@@ -599,8 +599,9 @@ add_to_trace(
599599/* Branch penalty: 0 if fully biased, FITNESS_BRANCH_BALANCED if 50/50,
600600 * 2*FITNESS_BRANCH_BALANCED if fully against the traced direction. */
601601static inline int
602- compute_branch_penalty (uint16_t history , bool branch_taken )
602+ compute_branch_penalty (uint16_t history )
603603{
604+ bool branch_taken = history & 1 ;
604605 int taken_count = _Py_popcount32 ((uint32_t )history );
605606 int on_trace_count = branch_taken ? taken_count : 16 - taken_count ;
606607 int off_trace = 16 - on_trace_count ;
@@ -811,10 +812,8 @@ _PyJit_translate_single_bytecode_to_trace(
811812 goto done ;
812813 }
813814
814- // Snapshot the buffer before reserving tail slots. The later charge
815- // includes both emitted uops and capacity reserved for exits/deopts/errors.
815+ // Snapshot the buffer position before emitting uops for this bytecode.
816816 _PyUOpInstruction * next_before = trace -> next ;
817- _PyUOpInstruction * end_before = trace -> end ;
818817
819818 // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
820819 trace -> end -= 2 ;
@@ -864,7 +863,7 @@ _PyJit_translate_single_bytecode_to_trace(
864863 assert (jump_happened ? (next_instr == computed_jump_instr ) : (next_instr == computed_next_instr ));
865864 uint32_t uopcode = BRANCH_TO_GUARD [opcode - POP_JUMP_IF_FALSE ][jump_happened ];
866865 ADD_TO_TRACE (uopcode , 0 , 0 , INSTR_IP (jump_happened ? computed_next_instr : computed_jump_instr , old_code ));
867- int bp = compute_branch_penalty (target_instr [1 ].cache , jump_happened );
866+ int bp = compute_branch_penalty (target_instr [1 ].cache );
868867 tracer -> translator_state .fitness -= bp ;
869868 DPRINTF (3 , " branch penalty: -%d (history=0x%04x, taken=%d) -> fitness=%d\n" ,
870869 bp , target_instr [1 ].cache , jump_happened ,
@@ -1057,16 +1056,12 @@ _PyJit_translate_single_bytecode_to_trace(
10571056 ADD_TO_TRACE (_JUMP_TO_TOP , 0 , 0 , 0 );
10581057 goto done ;
10591058 }
1060- // Charge fitness by trace-buffer capacity consumed for this bytecode,
1061- // including both emitted uops and tail reservations.
1059+ // Charge fitness by the number of uops actually emitted for this bytecode.
10621060 {
1063- int32_t slots_fwd = (int32_t )(trace -> next - next_before );
1064- int32_t slots_rev = (int32_t )(end_before - trace -> end );
1065- int32_t slots_used = slots_fwd + slots_rev ;
1061+ int32_t slots_used = (int32_t )(trace -> next - next_before );
10661062 tracer -> translator_state .fitness -= slots_used ;
1067- DPRINTF (3 , " per-insn cost: -%d (fwd=%d, rev=%d) -> fitness=%d\n" ,
1068- slots_used , slots_fwd , slots_rev ,
1069- tracer -> translator_state .fitness );
1063+ DPRINTF (3 , " per-insn cost: -%d -> fitness=%d\n" ,
1064+ slots_used , tracer -> translator_state .fitness );
10701065 }
10711066 DPRINTF (2 , "Trace continuing (fitness=%d)\n" , tracer -> translator_state .fitness );
10721067 return 1 ;
0 commit comments