diff --git a/internal/cbm/lsp/go_lsp.c b/internal/cbm/lsp/go_lsp.c index b104a9b83..7162eb27e 100644 --- a/internal/cbm/lsp/go_lsp.c +++ b/internal/cbm/lsp/go_lsp.c @@ -6,7 +6,22 @@ #include // Forward declarations -static void resolve_calls_in_node(GoLSPContext* ctx, TSNode node); +static void resolve_calls_in_node_inner(GoLSPContext* ctx, TSNode node); + +/* Depth-guarded entry for the AST call-resolution walk. The walk recurses once + * per nesting level; a deeply-nested or cyclic file can overflow the native + * stack (SIGSEGV) and take down the whole index. Past the cap the subtree is + * skipped — its calls stay unresolved, which is graceful degradation, not a + * crash. The cap is CBM_LSP_MAX_WALK_DEPTH, env-overridable via the same name. + * The walk_depth-- runs after the inner returns, so early returns in the body + * never leak the counter. */ +static void resolve_calls_in_node(GoLSPContext* ctx, TSNode node) { + if (ctx->walk_depth >= cbm_lsp_max_walk_depth()) + return; + ctx->walk_depth++; + resolve_calls_in_node_inner(ctx, node); + ctx->walk_depth--; +} static void emit_resolved_call(GoLSPContext* ctx, const char* callee_qn, const char* strategy, float confidence); static const CBMType* go_lookup_field(GoLSPContext* ctx, const char* type_qn, const char* field_name, int depth); static void extract_type_params_from_ast(CBMArena* arena, CBMTypeRegistry* reg, @@ -1108,7 +1123,7 @@ static void emit_unresolved_call(GoLSPContext* ctx, const char* expr_text, const // --- Walk call expressions and resolve them --- -static void resolve_calls_in_node(GoLSPContext* ctx, TSNode node) { +static void resolve_calls_in_node_inner(GoLSPContext* ctx, TSNode node) { if (ts_node_is_null(node)) return; const char* kind = ts_node_type(node); diff --git a/internal/cbm/lsp/go_lsp.h b/internal/cbm/lsp/go_lsp.h index e13f8f52d..1bf96c86a 100644 --- a/internal/cbm/lsp/go_lsp.h +++ b/internal/cbm/lsp/go_lsp.h @@ -26,6 +26,10 @@ typedef struct { // Output: resolved calls accumulate here CBMResolvedCallArray* resolved_calls; + // AST-walk recursion depth for resolve_calls_in_node (guards stack overflow + // on deeply-nested/cyclic files; see cbm_lsp_max_walk_depth). Zero via memset. + int walk_depth; + // Debug mode (CBM_LSP_DEBUG env) bool debug; } GoLSPContext; diff --git a/internal/cbm/lsp/kotlin_lsp.c b/internal/cbm/lsp/kotlin_lsp.c index 7a3d843a6..55811f07b 100644 --- a/internal/cbm/lsp/kotlin_lsp.c +++ b/internal/cbm/lsp/kotlin_lsp.c @@ -62,7 +62,22 @@ /* ── forward declarations ─────────────────────────────────────────── */ -static void kt_resolve_calls_in_node(KotlinLSPContext *ctx, TSNode node); +static void kt_resolve_calls_in_node_inner(KotlinLSPContext *ctx, TSNode node); + +/* Depth-guarded entry for the AST call-resolution walk. The walk recurses once + * per nesting level; a deeply-nested or cyclic file can overflow the native + * stack (SIGSEGV) and take down the whole index. Past the cap the subtree is + * skipped — its calls stay unresolved, which is graceful degradation, not a + * crash. The cap is CBM_LSP_MAX_WALK_DEPTH, env-overridable via the same name. + * The walk_depth-- runs after the inner returns, so early returns in the body + * never leak the counter. */ +static void kt_resolve_calls_in_node(KotlinLSPContext *ctx, TSNode node) { + if (ctx->walk_depth >= cbm_lsp_max_walk_depth()) + return; + ctx->walk_depth++; + kt_resolve_calls_in_node_inner(ctx, node); + ctx->walk_depth--; +} static void kt_process_class_decl(KotlinLSPContext *ctx, TSNode node); static void kt_process_object_decl(KotlinLSPContext *ctx, TSNode node, bool is_companion, const char *outer_class_qn); @@ -3174,7 +3189,7 @@ static void kt_process_statement(KotlinLSPContext *ctx, TSNode stmt) { /* Generic walker that fires call resolution on every call_expression in * a subtree, *without* descending into nested function/class bodies (those * are processed separately with their own scope). */ -static void kt_resolve_calls_in_node(KotlinLSPContext *ctx, TSNode node) { +static void kt_resolve_calls_in_node_inner(KotlinLSPContext *ctx, TSNode node) { if (ts_node_is_null(node)) { return; } diff --git a/internal/cbm/lsp/kotlin_lsp.h b/internal/cbm/lsp/kotlin_lsp.h index 2a1ec79a2..f6471bebd 100644 --- a/internal/cbm/lsp/kotlin_lsp.h +++ b/internal/cbm/lsp/kotlin_lsp.h @@ -110,6 +110,11 @@ typedef struct KotlinLSPContext { /* Recursion guard for kotlin_eval_expr_type. */ int eval_depth; + /* AST-walk recursion depth for kt_resolve_calls_in_node (guards stack + * overflow on deeply-nested/cyclic files; see cbm_lsp_max_walk_depth). + * Zero via memset. */ + int walk_depth; + /* Debug mode (CBM_LSP_DEBUG env). */ bool debug; } KotlinLSPContext; diff --git a/internal/cbm/lsp/php_lsp.c b/internal/cbm/lsp/php_lsp.c index b264b99e9..0fa2c0252 100644 --- a/internal/cbm/lsp/php_lsp.c +++ b/internal/cbm/lsp/php_lsp.c @@ -32,7 +32,22 @@ extern const TSLanguage *tree_sitter_php_only(void); /* Forward decls */ -static void php_resolve_calls_in_node(PHPLSPContext *ctx, TSNode node); +static void php_resolve_calls_in_node_inner(PHPLSPContext *ctx, TSNode node); + +/* Depth-guarded entry for the AST call-resolution walk. The walk recurses once + * per nesting level; a deeply-nested or cyclic file can overflow the native + * stack (SIGSEGV) and take down the whole index. Past the cap the subtree is + * skipped — its calls stay unresolved, which is graceful degradation, not a + * crash. The cap is CBM_LSP_MAX_WALK_DEPTH, env-overridable via the same name. + * The walk_depth-- runs after the inner returns, so early returns in the body + * never leak the counter. */ +static void php_resolve_calls_in_node(PHPLSPContext *ctx, TSNode node) { + if (ctx->walk_depth >= cbm_lsp_max_walk_depth()) + return; + ctx->walk_depth++; + php_resolve_calls_in_node_inner(ctx, node); + ctx->walk_depth--; +} static void process_function_like(PHPLSPContext *ctx, TSNode node); static void process_class_decl(PHPLSPContext *ctx, TSNode node); static const CBMType *php_substitute_template(CBMArena *arena, const CBMType *t, @@ -2110,7 +2125,7 @@ static void process_if_statement(PHPLSPContext *ctx, TSNode node) { } /* Walk a subtree, binding scope and resolving calls. */ -static void php_resolve_calls_in_node(PHPLSPContext *ctx, TSNode node) { +static void php_resolve_calls_in_node_inner(PHPLSPContext *ctx, TSNode node) { if (ts_node_is_null(node)) return; const char *kind = ts_node_type(node); diff --git a/internal/cbm/lsp/php_lsp.h b/internal/cbm/lsp/php_lsp.h index 354c98565..44cc0a09d 100644 --- a/internal/cbm/lsp/php_lsp.h +++ b/internal/cbm/lsp/php_lsp.h @@ -50,6 +50,11 @@ typedef struct { /* Recursion guard for php_eval_expr_type. */ int eval_depth; + /* AST-walk recursion depth for php_resolve_calls_in_node (guards stack + * overflow on deeply-nested/cyclic files; see cbm_lsp_max_walk_depth). + * Zero via memset. */ + int walk_depth; + /* Debug mode (CBM_LSP_DEBUG env). */ bool debug; } PHPLSPContext; diff --git a/internal/cbm/lsp/py_lsp.c b/internal/cbm/lsp/py_lsp.c index 83725efad..5af66cf3a 100644 --- a/internal/cbm/lsp/py_lsp.c +++ b/internal/cbm/lsp/py_lsp.c @@ -24,7 +24,22 @@ #include "py_builtins.c" // Forward decls -static void py_resolve_calls_in(PyLSPContext *ctx, TSNode node); +static void py_resolve_calls_in_inner(PyLSPContext *ctx, TSNode node); + +/* Depth-guarded entry for the AST call-resolution walk. The walk recurses once + * per nesting level; a deeply-nested or cyclic file can overflow the native + * stack (SIGSEGV) and take down the whole index. Past the cap the subtree is + * skipped — its calls stay unresolved, which is graceful degradation, not a + * crash. The cap is CBM_LSP_MAX_WALK_DEPTH, env-overridable via the same name. + * The walk_depth-- runs after the inner returns, so early returns in the body + * never leak the counter. */ +static void py_resolve_calls_in(PyLSPContext *ctx, TSNode node) { + if (ctx->walk_depth >= cbm_lsp_max_walk_depth()) + return; + ctx->walk_depth++; + py_resolve_calls_in_inner(ctx, node); + ctx->walk_depth--; +} static const CBMType *py_eval_expr_type(PyLSPContext *ctx, TSNode node); static void py_process_statement(PyLSPContext *ctx, TSNode node); static const CBMRegisteredFunc *py_lookup_attribute(PyLSPContext *ctx, const char *type_qn, @@ -2199,7 +2214,7 @@ static void py_emit_dunder_call(PyLSPContext *ctx, const CBMType *recv, const ch } } -static void py_resolve_calls_in(PyLSPContext *ctx, TSNode node) { +static void py_resolve_calls_in_inner(PyLSPContext *ctx, TSNode node) { if (!ctx || ts_node_is_null(node)) return; const char *k = ts_node_type(node); diff --git a/internal/cbm/lsp/py_lsp.h b/internal/cbm/lsp/py_lsp.h index 57553fd3b..2efbab60f 100644 --- a/internal/cbm/lsp/py_lsp.h +++ b/internal/cbm/lsp/py_lsp.h @@ -69,6 +69,10 @@ typedef struct { int dict_literal_count; int dict_literal_cap; + // AST-walk recursion depth for py_resolve_calls_in (guards stack overflow on + // deeply-nested/cyclic files; see cbm_lsp_max_walk_depth). Zero via memset. + int walk_depth; + // Debug mode (CBM_LSP_DEBUG env, shared across all language LSPs). bool debug; } PyLSPContext; diff --git a/internal/cbm/lsp/scope.h b/internal/cbm/lsp/scope.h index b187eb7c2..5aa0498d7 100644 --- a/internal/cbm/lsp/scope.h +++ b/internal/cbm/lsp/scope.h @@ -3,6 +3,7 @@ #include "type_rep.h" #include "../arena.h" +#include /* getenv, atoi (cbm_lsp_max_walk_depth) */ typedef struct { const char* name; @@ -28,6 +29,30 @@ typedef struct CBMScope { // rather than recursing — guards against pathological hierarchies. #define CBM_LSP_MAX_LOOKUP_DEPTH 16 +// Recursion cap for the per-language "resolve calls in AST node" walkers. These +// recurse once per AST nesting level; a deeply-nested or cyclic file can drive +// them into a native stack overflow (SIGSEGV) that takes down the whole index. +// Past this cap the wrapper skips the subtree — those calls stay unresolved, +// which is graceful degradation, not a crash. 512 is far deeper than any +// hand-written source nests; override for pathological/generated repos via the +// CBM_LSP_MAX_WALK_DEPTH env var (positive integer). +#define CBM_LSP_MAX_WALK_DEPTH 512 + +// Resolved walk-depth cap: env override (CBM_LSP_MAX_WALK_DEPTH, if a positive +// integer) else CBM_LSP_MAX_WALK_DEPTH. Read once and cached — the walkers call +// this per node, so it must not hit getenv on the hot path. The cache is a +// benign idempotent race under multi-threaded indexing (every thread computes +// the same value). +static inline int cbm_lsp_max_walk_depth(void) { + static int cached = -1; + if (cached < 0) { + const char* e = getenv("CBM_LSP_MAX_WALK_DEPTH"); + int v = (e && *e) ? atoi(e) : 0; + cached = (v > 0) ? v : CBM_LSP_MAX_WALK_DEPTH; + } + return cached; +} + CBMScope* cbm_scope_push(CBMArena* a, CBMScope* current); CBMScope* cbm_scope_pop(CBMScope* scope); void cbm_scope_bind(CBMScope* scope, const char* name, const CBMType* type); diff --git a/tests/test_stack_overflow.c b/tests/test_stack_overflow.c index b8f0680ee..31fdf5a83 100644 --- a/tests/test_stack_overflow.c +++ b/tests/test_stack_overflow.c @@ -516,6 +516,101 @@ TEST(lsp_ts_cyclic_types_no_crash) { PASS(); } +/* ─── Deeply-nested calls drive the per-language LSP resolve walkers into + * per-nesting-level native recursion. Unguarded, these SIGSEGV and take down + * the whole index. Each walker now has a walk_depth cap (CBM_LSP_MAX_WALK_DEPTH, + * env-overridable) that skips the too-deep subtree — graceful degradation. + * These mirror lsp_java_deep_nesting_no_crash: same fixture shape, one per + * previously-unguarded walker (py_resolve_calls_in, resolve_calls_in_node[go], + * php_resolve_calls_in_node, kt_resolve_calls_in_node). RED proof: run the + * suite with CBM_LSP_MAX_WALK_DEPTH set huge (disabling only these caps) and + * each of these four SIGSEGVs — proving the guard, not the fixture, is what + * keeps them green. ─── */ + +TEST(lsp_python_deep_nesting_no_crash) { + /* py_resolve_calls_in recurses per nesting level; see the Java analog. */ + const int DEPTH = 30000; + size_t sz = (size_t)DEPTH * 3 + 256; + char *src = malloc(sz); + ASSERT_NOT_NULL(src); + char *p = src; + p += snprintf(p, sz, "def f(a):\n return a\ndef g():\n return "); + for (int i = 0; i < DEPTH; i++) { + *p++ = 'f'; + *p++ = '('; + } + *p++ = '1'; + memset(p, ')', DEPTH); + p += DEPTH; + snprintf(p, sz - (size_t)(p - src), "\n"); + ASSERT_FALSE(so_extract_crashes(src, CBM_LANG_PYTHON, "deep.py")); + free(src); + PASS(); +} + +TEST(lsp_go_deep_nesting_no_crash) { + /* resolve_calls_in_node recurses per nesting level; see the Java analog. */ + const int DEPTH = 30000; + size_t sz = (size_t)DEPTH * 3 + 256; + char *src = malloc(sz); + ASSERT_NOT_NULL(src); + char *p = src; + p += snprintf(p, sz, "package p\nfunc f(a int) int { return a }\nfunc g() int { return "); + for (int i = 0; i < DEPTH; i++) { + *p++ = 'f'; + *p++ = '('; + } + *p++ = '1'; + memset(p, ')', DEPTH); + p += DEPTH; + snprintf(p, sz - (size_t)(p - src), " }\n"); + ASSERT_FALSE(so_extract_crashes(src, CBM_LANG_GO, "deep.go")); + free(src); + PASS(); +} + +TEST(lsp_php_deep_nesting_no_crash) { + /* php_resolve_calls_in_node recurses per nesting level; Java analog. */ + const int DEPTH = 30000; + size_t sz = (size_t)DEPTH * 3 + 256; + char *src = malloc(sz); + ASSERT_NOT_NULL(src); + char *p = src; + p += snprintf(p, sz, "