Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 29 additions & 4 deletions src/graph_buffer/graph_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,31 @@ static void make_id_key(char *buf, size_t bufsz, int64_t id) {
snprintf(buf, bufsz, "%lld", (long long)id);
}

static void make_edge_key(char *buf, size_t bufsz, int64_t src, int64_t tgt, const char *type) {
/* IMPORTS edges carry exactly one imported symbol's local_name (#768): two
* named imports from the same specifier resolve to the same (source,
* target) pair but are distinct symbols. Key on local_name too so the
* second import doesn't dedup-collide with and overwrite the first --
* every pass that walks IMPORTS edges (pass_calls.c, pass_usages.c,
* pass_semantic.c, pass_lsp_cross.c) expects one local_name per edge, so
* losing an edge here silently breaks cross-file call resolution for
* whichever symbol got dropped, not just "who imports X" queries. Other
* edge types keep the plain (source,target,type) key: collapsing repeat
* edges of the same type between the same two nodes (e.g. multiple call
* sites) into one is the existing, intended dedup behavior there. */
static void make_edge_key(char *buf, size_t bufsz, int64_t src, int64_t tgt, const char *type,
const char *properties_json) {
if (properties_json && strcmp(type, "IMPORTS") == 0) {
static const char local_name_key[] = "\"local_name\":\"";
const char *ln = strstr(properties_json, local_name_key);
if (ln) {
ln += sizeof(local_name_key) - 1;
const char *end = strchr(ln, '"');
size_t ln_len = end ? (size_t)(end - ln) : strlen(ln);
snprintf(buf, bufsz, "%lld:%lld:%s:%.*s", (long long)src, (long long)tgt, type,
(int)ln_len, ln);
return;
}
}
snprintf(buf, bufsz, "%lld:%lld:%s", (long long)src, (long long)tgt, type);
}

Expand Down Expand Up @@ -244,7 +268,7 @@ static void remove_node_from_ptr_array(node_ptr_array_t *arr, int64_t node_id) {
static void unindex_edge(cbm_gbuf_t *gb, const cbm_gbuf_edge_t *e) {
char key[EDGE_KEY_BUF];

make_edge_key(key, sizeof(key), e->source_id, e->target_id, e->type);
make_edge_key(key, sizeof(key), e->source_id, e->target_id, e->type, e->properties_json);
const char *ekey = cbm_ht_get_key(gb->edge_by_key, key);
cbm_ht_delete(gb->edge_by_key, key);
free((void *)ekey);
Expand Down Expand Up @@ -919,7 +943,7 @@ int64_t cbm_gbuf_insert_edge(cbm_gbuf_t *gb, int64_t source_id, int64_t target_i

/* Check for dedup */
char key[EDGE_KEY_BUF];
make_edge_key(key, sizeof(key), source_id, target_id, type);
make_edge_key(key, sizeof(key), source_id, target_id, type, properties_json);

cbm_gbuf_edge_t *existing = cbm_ht_get(gb->edge_by_key, key);
if (existing) {
Expand Down Expand Up @@ -1032,7 +1056,8 @@ int cbm_gbuf_delete_edges_by_type(cbm_gbuf_t *gb, const char *type) {
cbm_gbuf_edge_t *e = gb->edges.items[i];
if (strcmp(e->type, type) == 0) {
char key[EDGE_KEY_BUF];
make_edge_key(key, sizeof(key), e->source_id, e->target_id, e->type);
make_edge_key(key, sizeof(key), e->source_id, e->target_id, e->type,
e->properties_json);
const char *ekey = cbm_ht_get_key(gb->edge_by_key, key);
cbm_ht_delete(gb->edge_by_key, key);
free((void *)ekey);
Expand Down
38 changes: 38 additions & 0 deletions tests/test_graph_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "test_framework.h"
#include "graph_buffer/graph_buffer.h"
#include "store/store.h"
#include <string.h>

/* ── Node operations ───────────────────────────────────────────── */

Expand Down Expand Up @@ -173,6 +174,42 @@ TEST(gbuf_edge_dedup) {
PASS();
}

/* #768: two named imports from the same specifier (same source, same target
* file) must produce two distinct IMPORTS edges, keyed apart by local_name --
* not collapse into one edge that silently drops whichever import lost the
* dedup race. Re-inserting the SAME local_name (e.g. an idempotent re-index)
* must still dedup to one edge. */
TEST(gbuf_imports_multi_symbol_dedup) {
cbm_gbuf_t *gb = cbm_gbuf_new("test", "/tmp");
int64_t consumer = cbm_gbuf_upsert_node(gb, "File", "consumer.ts", "pkg.consumer", "consumer.ts",
1, 1, "{}");
int64_t lib = cbm_gbuf_upsert_node(gb, "File", "lib.ts", "pkg.lib", "lib.ts", 1, 1, "{}");

int64_t eid_a = cbm_gbuf_insert_edge(gb, consumer, lib, "IMPORTS", "{\"local_name\":\"A\"}");
int64_t eid_b = cbm_gbuf_insert_edge(gb, consumer, lib, "IMPORTS", "{\"local_name\":\"B\"}");
ASSERT_GT(eid_a, 0);
ASSERT_GT(eid_b, 0);
ASSERT_NEQ(eid_a, eid_b); /* distinct symbols -> distinct edges */
ASSERT_EQ(cbm_gbuf_edge_count(gb), 2);

const cbm_gbuf_edge_t **edges = NULL;
int count = 0;
cbm_gbuf_find_edges_by_source_type(gb, consumer, "IMPORTS", &edges, &count);
ASSERT_EQ(count, 2);
ASSERT_TRUE(strstr(edges[0]->properties_json, "\"local_name\":\"A\"") != NULL ||
strstr(edges[1]->properties_json, "\"local_name\":\"A\"") != NULL);
ASSERT_TRUE(strstr(edges[0]->properties_json, "\"local_name\":\"B\"") != NULL ||
strstr(edges[1]->properties_json, "\"local_name\":\"B\"") != NULL);

/* Re-inserting the same symbol (idempotent re-index) still dedups. */
int64_t eid_a_again = cbm_gbuf_insert_edge(gb, consumer, lib, "IMPORTS", "{\"local_name\":\"A\"}");
ASSERT_EQ(eid_a_again, eid_a);
ASSERT_EQ(cbm_gbuf_edge_count(gb), 2);

cbm_gbuf_free(gb);
PASS();
}

TEST(gbuf_find_edges_by_source_type) {
cbm_gbuf_t *gb = cbm_gbuf_new("test", "/tmp");
int64_t a = cbm_gbuf_upsert_node(gb, "Function", "a", "pkg.a", "f.go", 1, 5, "{}");
Expand Down Expand Up @@ -941,6 +978,7 @@ SUITE(graph_buffer) {
RUN_TEST(gbuf_delete_by_label);
RUN_TEST(gbuf_insert_edge);
RUN_TEST(gbuf_edge_dedup);
RUN_TEST(gbuf_imports_multi_symbol_dedup);
RUN_TEST(gbuf_find_edges_by_source_type);
RUN_TEST(gbuf_find_edges_by_target_type);
RUN_TEST(gbuf_find_edges_by_type);
Expand Down
Loading