Skip to content

Commit 738696f

Browse files
Revert "pythongh-143421: Move JitOptContext from stack allocation to per-thread heap allocation (pythonGH-143536)"
This reverts commit aeb3403.
1 parent 234a15d commit 738696f

6 files changed

Lines changed: 147 additions & 150 deletions

File tree

Include/internal/pycore_optimizer.h

Lines changed: 123 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ extern "C" {
1212
#include "pycore_uop.h" // _PyUOpInstruction
1313
#include "pycore_uop_ids.h"
1414
#include "pycore_stackref.h" // _PyStackRef
15-
#include "pycore_optimizer_types.h"
1615
#include <stdbool.h>
1716

1817

@@ -85,7 +84,7 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
8584
#define JIT_CLEANUP_THRESHOLD 1000
8685

8786
int _Py_uop_analyze_and_optimize(
88-
_PyThreadStateImpl *tstate,
87+
PyFunctionObject *func,
8988
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
9089
_PyBloomFilter *dependencies);
9190

@@ -113,6 +112,86 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
113112
return inst->error_target;
114113
}
115114

115+
// Holds locals, stack, locals, stack ... co_consts (in that order)
116+
#define MAX_ABSTRACT_INTERP_SIZE 4096
117+
118+
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
119+
120+
// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
121+
#define MAX_ABSTRACT_FRAME_DEPTH (16)
122+
123+
// The maximum number of side exits that we can take before requiring forward
124+
// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
125+
// is the "maximum amount of polymorphism" that an isolated trace tree can
126+
// handle before rejoining the rest of the program.
127+
#define MAX_CHAIN_DEPTH 4
128+
129+
/* Symbols */
130+
/* See explanation in optimizer_symbols.c */
131+
132+
133+
typedef enum _JitSymType {
134+
JIT_SYM_UNKNOWN_TAG = 1,
135+
JIT_SYM_NULL_TAG = 2,
136+
JIT_SYM_NON_NULL_TAG = 3,
137+
JIT_SYM_BOTTOM_TAG = 4,
138+
JIT_SYM_TYPE_VERSION_TAG = 5,
139+
JIT_SYM_KNOWN_CLASS_TAG = 6,
140+
JIT_SYM_KNOWN_VALUE_TAG = 7,
141+
JIT_SYM_TUPLE_TAG = 8,
142+
JIT_SYM_TRUTHINESS_TAG = 9,
143+
JIT_SYM_COMPACT_INT = 10,
144+
} JitSymType;
145+
146+
typedef struct _jit_opt_known_class {
147+
uint8_t tag;
148+
uint32_t version;
149+
PyTypeObject *type;
150+
} JitOptKnownClass;
151+
152+
typedef struct _jit_opt_known_version {
153+
uint8_t tag;
154+
uint32_t version;
155+
} JitOptKnownVersion;
156+
157+
typedef struct _jit_opt_known_value {
158+
uint8_t tag;
159+
PyObject *value;
160+
} JitOptKnownValue;
161+
162+
#define MAX_SYMBOLIC_TUPLE_SIZE 7
163+
164+
typedef struct _jit_opt_tuple {
165+
uint8_t tag;
166+
uint8_t length;
167+
uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
168+
} JitOptTuple;
169+
170+
typedef struct {
171+
uint8_t tag;
172+
bool invert;
173+
uint16_t value;
174+
} JitOptTruthiness;
175+
176+
typedef struct {
177+
uint8_t tag;
178+
} JitOptCompactInt;
179+
180+
typedef union _jit_opt_symbol {
181+
uint8_t tag;
182+
JitOptKnownClass cls;
183+
JitOptKnownValue value;
184+
JitOptKnownVersion version;
185+
JitOptTuple tuple;
186+
JitOptTruthiness truthiness;
187+
JitOptCompactInt compact;
188+
} JitOptSymbol;
189+
190+
191+
// This mimics the _PyStackRef API
192+
typedef union {
193+
uintptr_t bits;
194+
} JitOptRef;
116195

117196
#define REF_IS_BORROWED 1
118197

@@ -159,6 +238,48 @@ PyJitRef_IsBorrowed(JitOptRef ref)
159238
return (ref.bits & REF_IS_BORROWED) == REF_IS_BORROWED;
160239
}
161240

241+
struct _Py_UOpsAbstractFrame {
242+
bool globals_watched;
243+
// The version number of the globals dicts, once checked. 0 if unchecked.
244+
uint32_t globals_checked_version;
245+
// Max stacklen
246+
int stack_len;
247+
int locals_len;
248+
PyFunctionObject *func;
249+
PyCodeObject *code;
250+
251+
JitOptRef *stack_pointer;
252+
JitOptRef *stack;
253+
JitOptRef *locals;
254+
};
255+
256+
typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
257+
258+
typedef struct ty_arena {
259+
int ty_curr_number;
260+
int ty_max_number;
261+
JitOptSymbol arena[TY_ARENA_SIZE];
262+
} ty_arena;
263+
264+
typedef struct _JitOptContext {
265+
char done;
266+
char out_of_space;
267+
bool contradiction;
268+
// Has the builtins dict been watched?
269+
bool builtins_watched;
270+
// The current "executing" frame.
271+
_Py_UOpsAbstractFrame *frame;
272+
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
273+
int curr_frame_depth;
274+
275+
// Arena for the symbolic types.
276+
ty_arena t_arena;
277+
278+
JitOptRef *n_consumed;
279+
JitOptRef *limit;
280+
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
281+
} JitOptContext;
282+
162283
extern bool _Py_uop_sym_is_null(JitOptRef sym);
163284
extern bool _Py_uop_sym_is_not_null(JitOptRef sym);
164285
extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef sym);

Include/internal/pycore_optimizer_types.h

Lines changed: 0 additions & 137 deletions
This file was deleted.

Include/internal/pycore_tstate.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ extern "C" {
1212
#include "pycore_freelist_state.h" // struct _Py_freelists
1313
#include "pycore_interpframe_structs.h" // _PyInterpreterFrame
1414
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
15-
#include "pycore_optimizer_types.h" // JitOptContext
1615
#include "pycore_qsbr.h" // struct qsbr
1716
#include "pycore_uop.h" // struct _PyUOpInstruction
1817
#include "pycore_structs.h"
@@ -53,11 +52,10 @@ typedef struct _PyJitTracerTranslatorState {
5352
} _PyJitTracerTranslatorState;
5453

5554
typedef struct _PyJitTracerState {
55+
_PyUOpInstruction *code_buffer;
5656
_PyJitTracerInitialState initial_state;
5757
_PyJitTracerPreviousState prev_state;
5858
_PyJitTracerTranslatorState translator_state;
59-
JitOptContext opt_context;
60-
_PyUOpInstruction code_buffer[UOP_MAX_TRACE_LENGTH];
6159
} _PyJitTracerState;
6260

6361
#endif

Python/optimizer.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,13 @@ _PyJit_TryInitializeTracing(
10251025
if (oparg > 0xFFFF) {
10261026
return 0;
10271027
}
1028+
if (_tstate->jit_tracer_state.code_buffer == NULL) {
1029+
_tstate->jit_tracer_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
1030+
if (_tstate->jit_tracer_state.code_buffer == NULL) {
1031+
// Don't error, just go to next instruction.
1032+
return 0;
1033+
}
1034+
}
10281035
PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
10291036
if (func == NULL) {
10301037
return 0;
@@ -1477,8 +1484,8 @@ uop_optimize(
14771484
OPT_STAT_INC(traces_created);
14781485
if (!is_noopt) {
14791486
length = _Py_uop_analyze_and_optimize(
1480-
_tstate,
1481-
buffer, length,
1487+
_tstate->jit_tracer_state.initial_state.func,
1488+
buffer,length,
14821489
curr_stackentries, dependencies);
14831490
if (length <= 0) {
14841491
return length;

Python/optimizer_analysis.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include "pycore_opcode_metadata.h"
1919
#include "pycore_opcode_utils.h"
2020
#include "pycore_pystate.h" // _PyInterpreterState_GET()
21-
#include "pycore_tstate.h" // _PyThreadStateImpl
2221
#include "pycore_uop_metadata.h"
2322
#include "pycore_long.h"
2423
#include "pycore_interpframe.h" // _PyFrame_GetCode
@@ -335,17 +334,17 @@ _Py_opt_assert_within_stack_bounds(
335334
/* >0 (length) for success, 0 for not ready, clears all possible errors. */
336335
static int
337336
optimize_uops(
338-
_PyThreadStateImpl *tstate,
337+
PyFunctionObject *func,
339338
_PyUOpInstruction *trace,
340339
int trace_len,
341340
int curr_stacklen,
342341
_PyBloomFilter *dependencies
343342
)
344343
{
345344
assert(!PyErr_Occurred());
346-
PyFunctionObject *func = tstate->jit_tracer_state.initial_state.func;
347345

348-
JitOptContext *ctx = &tstate->jit_tracer_state.opt_context;
346+
JitOptContext context;
347+
JitOptContext *ctx = &context;
349348
uint32_t opcode = UINT16_MAX;
350349

351350
// Make sure that watchers are set up
@@ -575,7 +574,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
575574
// > 0 - length of optimized trace
576575
int
577576
_Py_uop_analyze_and_optimize(
578-
_PyThreadStateImpl *tstate,
577+
PyFunctionObject *func,
579578
_PyUOpInstruction *buffer,
580579
int length,
581580
int curr_stacklen,
@@ -585,7 +584,7 @@ _Py_uop_analyze_and_optimize(
585584
OPT_STAT_INC(optimizer_attempts);
586585

587586
length = optimize_uops(
588-
tstate, buffer,
587+
func, buffer,
589588
length, curr_stacklen, dependencies);
590589

591590
if (length == 0) {

0 commit comments

Comments
 (0)