@@ -123,44 +123,74 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
123123 * LINE TABLE PARSING FUNCTIONS
124124 * ============================================================================ */
125125
126+ // Inline helper for bounds-checked byte reading (no function call overhead)
127+ static inline int
128+ read_byte(const uint8_t **ptr, const uint8_t *end, uint8_t *out)
129+ {
130+ if (*ptr >= end) {
131+ return -1;
132+ }
133+ *out = *(*ptr)++;
134+ return 0;
135+ }
136+
126137static int
127- scan_varint(const uint8_t **ptr)
138+ scan_varint(const uint8_t **ptr, const uint8_t *end )
128139{
129- unsigned int read = **ptr;
130- *ptr = *ptr + 1;
140+ uint8_t read;
141+ if (read_byte(ptr, end, &read) < 0) {
142+ return -1;
143+ }
131144 unsigned int val = read & 63;
132145 unsigned int shift = 0;
133146 while (read & 64) {
134- read = **ptr;
135- *ptr = *ptr + 1;
147+ if (read_byte(ptr, end, &read) < 0) {
148+ return -1;
149+ }
136150 shift += 6;
151+ // Prevent infinite loop on malformed data (shift overflow)
152+ if (shift > 28) {
153+ return -1;
154+ }
137155 val |= (read & 63) << shift;
138156 }
139- return val;
157+ return (int) val;
140158}
141159
142160static int
143- scan_signed_varint(const uint8_t **ptr)
161+ scan_signed_varint(const uint8_t **ptr, const uint8_t *end )
144162{
145- unsigned int uval = scan_varint(ptr);
163+ int uval = scan_varint(ptr, end);
164+ if (uval < 0) {
165+ return INT_MIN; // Error sentinel (valid signed varints won't be INT_MIN)
166+ }
146167 if (uval & 1) {
147- return -(int)(uval >> 1);
168+ return -(int)((unsigned int) uval >> 1);
148169 }
149170 else {
150- return uval >> 1;
171+ return (int)((unsigned int) uval >> 1) ;
151172 }
152173}
153174
154175bool
155- parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info)
176+ parse_linetable(const uintptr_t addrq, const char* linetable, Py_ssize_t linetable_size,
177+ int firstlineno, LocationInfo* info)
156178{
179+ // Reject garbage: zero or negative size
180+ if (linetable_size <= 0) {
181+ return false;
182+ }
183+
157184 const uint8_t* ptr = (const uint8_t*)(linetable);
185+ const uint8_t* end = ptr + linetable_size;
158186 uintptr_t addr = 0;
159187 int computed_line = firstlineno; // Running accumulator, separate from output
188+ int val; // Temporary for varint results
189+ uint8_t byte; // Temporary for byte reads
160190 const size_t MAX_LINETABLE_ENTRIES = 65536;
161191 size_t entry_count = 0;
162192
163- while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
193+ while (ptr < end && *ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
164194 entry_count++;
165195 uint8_t first_byte = *(ptr++);
166196 uint8_t code = (first_byte >> 3) & 15;
@@ -173,14 +203,34 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
173203 info->column = info->end_column = -1;
174204 break;
175205 case PY_CODE_LOCATION_INFO_LONG:
176- computed_line += scan_signed_varint(&ptr);
206+ val = scan_signed_varint(&ptr, end);
207+ if (val == INT_MIN) {
208+ return false;
209+ }
210+ computed_line += val;
177211 info->lineno = computed_line;
178- info->end_lineno = computed_line + scan_varint(&ptr);
179- info->column = scan_varint(&ptr) - 1;
180- info->end_column = scan_varint(&ptr) - 1;
212+ val = scan_varint(&ptr, end);
213+ if (val < 0) {
214+ return false;
215+ }
216+ info->end_lineno = computed_line + val;
217+ val = scan_varint(&ptr, end);
218+ if (val < 0) {
219+ return false;
220+ }
221+ info->column = val - 1;
222+ val = scan_varint(&ptr, end);
223+ if (val < 0) {
224+ return false;
225+ }
226+ info->end_column = val - 1;
181227 break;
182228 case PY_CODE_LOCATION_INFO_NO_COLUMNS:
183- computed_line += scan_signed_varint(&ptr);
229+ val = scan_signed_varint(&ptr, end);
230+ if (val == INT_MIN) {
231+ return false;
232+ }
233+ computed_line += val;
184234 info->lineno = info->end_lineno = computed_line;
185235 info->column = info->end_column = -1;
186236 break;
@@ -189,17 +239,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
189239 case PY_CODE_LOCATION_INFO_ONE_LINE2:
190240 computed_line += code - 10;
191241 info->lineno = info->end_lineno = computed_line;
192- info->column = *(ptr++);
193- info->end_column = *(ptr++);
242+ if (read_byte(&ptr, end, &byte) < 0) {
243+ return false;
244+ }
245+ info->column = byte;
246+ if (read_byte(&ptr, end, &byte) < 0) {
247+ return false;
248+ }
249+ info->end_column = byte;
194250 break;
195251 default: {
196- uint8_t second_byte = *(ptr++);
197- if ((second_byte & 128) != 0) {
252+ if (read_byte(&ptr, end, &byte) < 0) {
253+ return false;
254+ }
255+ if ((byte & 128) != 0) {
198256 return false;
199257 }
200258 info->lineno = info->end_lineno = computed_line;
201- info->column = code << 3 | (second_byte >> 4);
202- info->end_column = info->column + (second_byte & 15);
259+ info->column = code << 3 | (byte >> 4);
260+ info->end_column = info->column + (byte & 15);
203261 break;
204262 }
205263 }
@@ -384,8 +442,14 @@ parse_code_object(RemoteUnwinderObject *unwinder,
384442 tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
385443 }
386444
387- if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) {
388- assert(ctx->tlbc_index >= 0);
445+ // Validate tlbc_index and check TLBC cache
446+ if (tlbc_entry) {
447+ // Validate index bounds (also catches negative values since tlbc_index is signed)
448+ if (ctx->tlbc_index < 0 || ctx->tlbc_index >= tlbc_entry->tlbc_array_size) {
449+ set_exception_cause(unwinder, PyExc_RuntimeError,
450+ "Invalid tlbc_index (corrupted remote memory)");
451+ goto error;
452+ }
389453 assert(tlbc_entry->tlbc_array_size > 0);
390454 // Use cached TLBC data
391455 uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
@@ -398,7 +462,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
398462 }
399463 }
400464
401- // Fall back to main bytecode
465+ // Fall back to main bytecode (no tlbc_entry or tlbc_bytecode_addr was 0)
402466 addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
403467
404468done_tlbc:
@@ -409,6 +473,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
409473 ; // Empty statement to avoid C23 extension warning
410474 LocationInfo info = {0};
411475 bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
476+ PyBytes_GET_SIZE(meta->linetable),
412477 meta->first_lineno, &info);
413478 if (!ok) {
414479 info.lineno = -1;
0 commit comments