Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions internal/cbm/lsp/go_lsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,22 @@
#include <stdlib.h>

// Forward declarations
static void resolve_calls_in_node(GoLSPContext* ctx, TSNode node);
static void resolve_calls_in_node_inner(GoLSPContext* ctx, TSNode node);

/* Depth-guarded entry for the AST call-resolution walk. The walk recurses once
* per nesting level; a deeply-nested or cyclic file can overflow the native
* stack (SIGSEGV) and take down the whole index. Past the cap the subtree is
* skipped — its calls stay unresolved, which is graceful degradation, not a
* crash. The cap is CBM_LSP_MAX_WALK_DEPTH, env-overridable via the same name.
* The walk_depth-- runs after the inner returns, so early returns in the body
* never leak the counter. */
static void resolve_calls_in_node(GoLSPContext* ctx, TSNode node) {
if (ctx->walk_depth >= cbm_lsp_max_walk_depth())
return;
ctx->walk_depth++;
resolve_calls_in_node_inner(ctx, node);
ctx->walk_depth--;
}
static void emit_resolved_call(GoLSPContext* ctx, const char* callee_qn, const char* strategy, float confidence);
static const CBMType* go_lookup_field(GoLSPContext* ctx, const char* type_qn, const char* field_name, int depth);
static void extract_type_params_from_ast(CBMArena* arena, CBMTypeRegistry* reg,
Expand Down Expand Up @@ -1108,7 +1123,7 @@ static void emit_unresolved_call(GoLSPContext* ctx, const char* expr_text, const

// --- Walk call expressions and resolve them ---

static void resolve_calls_in_node(GoLSPContext* ctx, TSNode node) {
static void resolve_calls_in_node_inner(GoLSPContext* ctx, TSNode node) {
if (ts_node_is_null(node)) return;
const char* kind = ts_node_type(node);

Expand Down
4 changes: 4 additions & 0 deletions internal/cbm/lsp/go_lsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ typedef struct {
// Output: resolved calls accumulate here
CBMResolvedCallArray* resolved_calls;

// AST-walk recursion depth for resolve_calls_in_node (guards stack overflow
// on deeply-nested/cyclic files; see cbm_lsp_max_walk_depth). Zero via memset.
int walk_depth;

// Debug mode (CBM_LSP_DEBUG env)
bool debug;
} GoLSPContext;
Expand Down
19 changes: 17 additions & 2 deletions internal/cbm/lsp/kotlin_lsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,22 @@

/* ── forward declarations ─────────────────────────────────────────── */

static void kt_resolve_calls_in_node(KotlinLSPContext *ctx, TSNode node);
static void kt_resolve_calls_in_node_inner(KotlinLSPContext *ctx, TSNode node);

/* Depth-guarded entry for the AST call-resolution walk. The walk recurses once
* per nesting level; a deeply-nested or cyclic file can overflow the native
* stack (SIGSEGV) and take down the whole index. Past the cap the subtree is
* skipped — its calls stay unresolved, which is graceful degradation, not a
* crash. The cap is CBM_LSP_MAX_WALK_DEPTH, env-overridable via the same name.
* The walk_depth-- runs after the inner returns, so early returns in the body
* never leak the counter. */
static void kt_resolve_calls_in_node(KotlinLSPContext *ctx, TSNode node) {
if (ctx->walk_depth >= cbm_lsp_max_walk_depth())
return;
ctx->walk_depth++;
kt_resolve_calls_in_node_inner(ctx, node);
ctx->walk_depth--;
}
static void kt_process_class_decl(KotlinLSPContext *ctx, TSNode node);
static void kt_process_object_decl(KotlinLSPContext *ctx, TSNode node, bool is_companion,
const char *outer_class_qn);
Expand Down Expand Up @@ -3174,7 +3189,7 @@ static void kt_process_statement(KotlinLSPContext *ctx, TSNode stmt) {
/* Generic walker that fires call resolution on every call_expression in
* a subtree, *without* descending into nested function/class bodies (those
* are processed separately with their own scope). */
static void kt_resolve_calls_in_node(KotlinLSPContext *ctx, TSNode node) {
static void kt_resolve_calls_in_node_inner(KotlinLSPContext *ctx, TSNode node) {
if (ts_node_is_null(node)) {
return;
}
Expand Down
5 changes: 5 additions & 0 deletions internal/cbm/lsp/kotlin_lsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ typedef struct KotlinLSPContext {
/* Recursion guard for kotlin_eval_expr_type. */
int eval_depth;

/* AST-walk recursion depth for kt_resolve_calls_in_node (guards stack
* overflow on deeply-nested/cyclic files; see cbm_lsp_max_walk_depth).
* Zero via memset. */
int walk_depth;

/* Debug mode (CBM_LSP_DEBUG env). */
bool debug;
} KotlinLSPContext;
Expand Down
19 changes: 17 additions & 2 deletions internal/cbm/lsp/php_lsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,22 @@
extern const TSLanguage *tree_sitter_php_only(void);

/* Forward decls */
static void php_resolve_calls_in_node(PHPLSPContext *ctx, TSNode node);
static void php_resolve_calls_in_node_inner(PHPLSPContext *ctx, TSNode node);

/* Depth-guarded entry for the AST call-resolution walk. The walk recurses once
* per nesting level; a deeply-nested or cyclic file can overflow the native
* stack (SIGSEGV) and take down the whole index. Past the cap the subtree is
* skipped — its calls stay unresolved, which is graceful degradation, not a
* crash. The cap is CBM_LSP_MAX_WALK_DEPTH, env-overridable via the same name.
* The walk_depth-- runs after the inner returns, so early returns in the body
* never leak the counter. */
static void php_resolve_calls_in_node(PHPLSPContext *ctx, TSNode node) {
if (ctx->walk_depth >= cbm_lsp_max_walk_depth())
return;
ctx->walk_depth++;
php_resolve_calls_in_node_inner(ctx, node);
ctx->walk_depth--;
}
static void process_function_like(PHPLSPContext *ctx, TSNode node);
static void process_class_decl(PHPLSPContext *ctx, TSNode node);
static const CBMType *php_substitute_template(CBMArena *arena, const CBMType *t,
Expand Down Expand Up @@ -2110,7 +2125,7 @@ static void process_if_statement(PHPLSPContext *ctx, TSNode node) {
}

/* Walk a subtree, binding scope and resolving calls. */
static void php_resolve_calls_in_node(PHPLSPContext *ctx, TSNode node) {
static void php_resolve_calls_in_node_inner(PHPLSPContext *ctx, TSNode node) {
if (ts_node_is_null(node))
return;
const char *kind = ts_node_type(node);
Expand Down
5 changes: 5 additions & 0 deletions internal/cbm/lsp/php_lsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ typedef struct {
/* Recursion guard for php_eval_expr_type. */
int eval_depth;

/* AST-walk recursion depth for php_resolve_calls_in_node (guards stack
* overflow on deeply-nested/cyclic files; see cbm_lsp_max_walk_depth).
* Zero via memset. */
int walk_depth;

/* Debug mode (CBM_LSP_DEBUG env). */
bool debug;
} PHPLSPContext;
Expand Down
19 changes: 17 additions & 2 deletions internal/cbm/lsp/py_lsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,22 @@
#include "py_builtins.c"

// Forward decls
static void py_resolve_calls_in(PyLSPContext *ctx, TSNode node);
static void py_resolve_calls_in_inner(PyLSPContext *ctx, TSNode node);

/* Depth-guarded entry for the AST call-resolution walk. The walk recurses once
* per nesting level; a deeply-nested or cyclic file can overflow the native
* stack (SIGSEGV) and take down the whole index. Past the cap the subtree is
* skipped — its calls stay unresolved, which is graceful degradation, not a
* crash. The cap is CBM_LSP_MAX_WALK_DEPTH, env-overridable via the same name.
* The walk_depth-- runs after the inner returns, so early returns in the body
* never leak the counter. */
static void py_resolve_calls_in(PyLSPContext *ctx, TSNode node) {
if (ctx->walk_depth >= cbm_lsp_max_walk_depth())
return;
ctx->walk_depth++;
py_resolve_calls_in_inner(ctx, node);
ctx->walk_depth--;
}
static const CBMType *py_eval_expr_type(PyLSPContext *ctx, TSNode node);
static void py_process_statement(PyLSPContext *ctx, TSNode node);
static const CBMRegisteredFunc *py_lookup_attribute(PyLSPContext *ctx, const char *type_qn,
Expand Down Expand Up @@ -2199,7 +2214,7 @@ static void py_emit_dunder_call(PyLSPContext *ctx, const CBMType *recv, const ch
}
}

static void py_resolve_calls_in(PyLSPContext *ctx, TSNode node) {
static void py_resolve_calls_in_inner(PyLSPContext *ctx, TSNode node) {
if (!ctx || ts_node_is_null(node))
return;
const char *k = ts_node_type(node);
Expand Down
4 changes: 4 additions & 0 deletions internal/cbm/lsp/py_lsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ typedef struct {
int dict_literal_count;
int dict_literal_cap;

// AST-walk recursion depth for py_resolve_calls_in (guards stack overflow on
// deeply-nested/cyclic files; see cbm_lsp_max_walk_depth). Zero via memset.
int walk_depth;

// Debug mode (CBM_LSP_DEBUG env, shared across all language LSPs).
bool debug;
} PyLSPContext;
Expand Down
25 changes: 25 additions & 0 deletions internal/cbm/lsp/scope.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "type_rep.h"
#include "../arena.h"
#include <stdlib.h> /* getenv, atoi (cbm_lsp_max_walk_depth) */

typedef struct {
const char* name;
Expand All @@ -28,6 +29,30 @@ typedef struct CBMScope {
// rather than recursing — guards against pathological hierarchies.
#define CBM_LSP_MAX_LOOKUP_DEPTH 16

// Recursion cap for the per-language "resolve calls in AST node" walkers. These
// recurse once per AST nesting level; a deeply-nested or cyclic file can drive
// them into a native stack overflow (SIGSEGV) that takes down the whole index.
// Past this cap the wrapper skips the subtree — those calls stay unresolved,
// which is graceful degradation, not a crash. 512 is far deeper than any
// hand-written source nests; override for pathological/generated repos via the
// CBM_LSP_MAX_WALK_DEPTH env var (positive integer).
#define CBM_LSP_MAX_WALK_DEPTH 512

// Resolved walk-depth cap: env override (CBM_LSP_MAX_WALK_DEPTH, if a positive
// integer) else CBM_LSP_MAX_WALK_DEPTH. Read once and cached — the walkers call
// this per node, so it must not hit getenv on the hot path. The cache is a
// benign idempotent race under multi-threaded indexing (every thread computes
// the same value).
static inline int cbm_lsp_max_walk_depth(void) {
static int cached = -1;
if (cached < 0) {
const char* e = getenv("CBM_LSP_MAX_WALK_DEPTH");
int v = (e && *e) ? atoi(e) : 0;
cached = (v > 0) ? v : CBM_LSP_MAX_WALK_DEPTH;
}
return cached;
}

CBMScope* cbm_scope_push(CBMArena* a, CBMScope* current);
CBMScope* cbm_scope_pop(CBMScope* scope);
void cbm_scope_bind(CBMScope* scope, const char* name, const CBMType* type);
Expand Down
99 changes: 99 additions & 0 deletions tests/test_stack_overflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,101 @@ TEST(lsp_ts_cyclic_types_no_crash) {
PASS();
}

/* ─── Deeply-nested calls drive the per-language LSP resolve walkers into
* per-nesting-level native recursion. Unguarded, these SIGSEGV and take down
* the whole index. Each walker now has a walk_depth cap (CBM_LSP_MAX_WALK_DEPTH,
* env-overridable) that skips the too-deep subtree — graceful degradation.
* These mirror lsp_java_deep_nesting_no_crash: same fixture shape, one per
* previously-unguarded walker (py_resolve_calls_in, resolve_calls_in_node[go],
* php_resolve_calls_in_node, kt_resolve_calls_in_node). RED proof: run the
* suite with CBM_LSP_MAX_WALK_DEPTH set huge (disabling only these caps) and
* each of these four SIGSEGVs — proving the guard, not the fixture, is what
* keeps them green. ─── */

TEST(lsp_python_deep_nesting_no_crash) {
/* py_resolve_calls_in recurses per nesting level; see the Java analog. */
const int DEPTH = 30000;
size_t sz = (size_t)DEPTH * 3 + 256;
char *src = malloc(sz);
ASSERT_NOT_NULL(src);
char *p = src;
p += snprintf(p, sz, "def f(a):\n return a\ndef g():\n return ");
for (int i = 0; i < DEPTH; i++) {
*p++ = 'f';
*p++ = '(';
}
*p++ = '1';
memset(p, ')', DEPTH);
p += DEPTH;
snprintf(p, sz - (size_t)(p - src), "\n");
ASSERT_FALSE(so_extract_crashes(src, CBM_LANG_PYTHON, "deep.py"));
free(src);
PASS();
}

TEST(lsp_go_deep_nesting_no_crash) {
/* resolve_calls_in_node recurses per nesting level; see the Java analog. */
const int DEPTH = 30000;
size_t sz = (size_t)DEPTH * 3 + 256;
char *src = malloc(sz);
ASSERT_NOT_NULL(src);
char *p = src;
p += snprintf(p, sz, "package p\nfunc f(a int) int { return a }\nfunc g() int { return ");
for (int i = 0; i < DEPTH; i++) {
*p++ = 'f';
*p++ = '(';
}
*p++ = '1';
memset(p, ')', DEPTH);
p += DEPTH;
snprintf(p, sz - (size_t)(p - src), " }\n");
ASSERT_FALSE(so_extract_crashes(src, CBM_LANG_GO, "deep.go"));
free(src);
PASS();
}

TEST(lsp_php_deep_nesting_no_crash) {
/* php_resolve_calls_in_node recurses per nesting level; Java analog. */
const int DEPTH = 30000;
size_t sz = (size_t)DEPTH * 3 + 256;
char *src = malloc(sz);
ASSERT_NOT_NULL(src);
char *p = src;
p += snprintf(p, sz, "<?php\nfunction f($a) { return $a; }\nfunction g() { return ");
for (int i = 0; i < DEPTH; i++) {
*p++ = 'f';
*p++ = '(';
}
*p++ = '1';
memset(p, ')', DEPTH);
p += DEPTH;
snprintf(p, sz - (size_t)(p - src), "; }\n");
ASSERT_FALSE(so_extract_crashes(src, CBM_LANG_PHP, "deep.php"));
free(src);
PASS();
}

TEST(lsp_kotlin_deep_nesting_no_crash) {
/* kt_resolve_calls_in_node recurses per nesting level; Java analog. */
const int DEPTH = 30000;
size_t sz = (size_t)DEPTH * 3 + 256;
char *src = malloc(sz);
ASSERT_NOT_NULL(src);
char *p = src;
p += snprintf(p, sz, "fun f(a: Int): Int { return a }\nfun g(): Int { return ");
for (int i = 0; i < DEPTH; i++) {
*p++ = 'f';
*p++ = '(';
}
*p++ = '1';
memset(p, ')', DEPTH);
p += DEPTH;
snprintf(p, sz - (size_t)(p - src), " }\n");
ASSERT_FALSE(so_extract_crashes(src, CBM_LANG_KOTLIN, "deep.kt"));
free(src);
PASS();
}

/* ═══════════════════════════════════════════════════════════════════
* Suite registration
* ═══════════════════════════════════════════════════════════════════ */
Expand All @@ -529,6 +624,10 @@ SUITE(stack_overflow) {
RUN_TEST(lsp_java_lambda_args_exceed_params_no_crash);
RUN_TEST(lsp_cpp_deep_expression_no_crash);
RUN_TEST(lsp_ts_cyclic_types_no_crash);
RUN_TEST(lsp_python_deep_nesting_no_crash);
RUN_TEST(lsp_go_deep_nesting_no_crash);
RUN_TEST(lsp_php_deep_nesting_no_crash);
RUN_TEST(lsp_kotlin_deep_nesting_no_crash);

RUN_TEST(js_calls_exceed_512);
RUN_TEST(python_calls_exceed_512);
Expand Down
Loading