diff --git a/.clippy.toml b/.clippy.toml index ab238e7ed85..e953d3e189e 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -3,5 +3,5 @@ allow-indexing-slicing-in-tests = true allow-print-in-tests = true allow-renamed-params-for = ["core::fmt::Debug", "core::fmt::Display", "core::fmt::LowerHex", "core::fmt::UpperHex", "core::fmt::Pointer", "futures_sink::Sink", "serde::de::Visitor", ".."] avoid-breaking-exported-api = false -doc-valid-idents = ["BlockProtocol", "HaRPC", "HashQL", "OpenAPI", "PostgreSQL", "OAuth2", ".."] +doc-valid-idents = ["BlockProtocol", "HaRPC", "HashQL", "OpenAI", "OpenAPI", "PostgreSQL", "OAuth2", ".."] suppress-restriction-lint-in-const = true diff --git a/AGENTS.md b/AGENTS.md index f1cfd038e76..2575a5db8ff 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -81,6 +81,16 @@ cargo clippy --all-features --package For Rust packages, you can add features as needed with `--all-features`, specific features like `--features=foo,bar`, or use `cargo-hack` with `--feature-powerset` for comprehensive feature testing. +### Monorepo wiring for Rust crates + +Each Rust crate has a `package.json` whose **identity and workspace-dependency wiring** — its `@rust/` name, version, and the `dependencies` mirroring its `Cargo.toml` — is generated from `Cargo.toml`. After **adding, removing, or renaming a Rust crate**, or changing its `Cargo.toml` dependencies, re-sync that wiring: + +```bash +mise run sync:turborepo # sync package.json identity + deps from Cargo.toml metadata +``` + +`sync:turborepo` only manages that generated wiring — the `scripts` section is hand-maintained and is used by CI and Turborepo (e.g. `test:unit`, `lint:clippy`, `doc:dependency-diagram`), so add or edit scripts by hand. The task wraps the `repo-chores` CLI; the equivalent direct invocation is `cargo run --package hash-repo-chores --bin repo-chores-cli -- sync-turborepo`. A related task, `mise run fix:package-json`, sorts `package.json` keys consistently. + ## Documentation Maintenance ### Petrinaut user-facing docs diff --git a/Cargo.lock b/Cargo.lock index 9657b71a390..1062aca64c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -201,7 +201,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -225,7 +225,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -1848,7 +1848,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e75b2483e97a5a7da73ac68a05b629f9c53cff58d8ed1c77866079e18b00dba5" dependencies = [ "digest 0.10.7", - "spin 0.10.0", + "spin", ] [[package]] @@ -2213,7 +2213,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccc2776f0c61eca1ca32528f85548abd1a4be8fb53d1b21c013e4f18da1e7090" dependencies = [ "data-encoding", - "syn 2.0.118", + "syn 1.0.109", ] [[package]] @@ -2662,7 +2662,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -2684,7 +2684,7 @@ dependencies = [ "rustc_version", "serde", "serde_core", - "spin 0.12.1", + "spin", "supports-color", "supports-unicode", "thiserror 2.0.18", @@ -3518,6 +3518,7 @@ dependencies = [ "hash-codec", "hash-graph-api", "hash-graph-authorization", + "hash-graph-embeddings", "hash-graph-postgres-store", "hash-graph-store", "hash-graph-type-fetcher", @@ -3561,6 +3562,7 @@ dependencies = [ "harpc-types", "hash-codec", "hash-graph-authorization", + "hash-graph-embeddings", "hash-graph-postgres-store", "hash-graph-store", "hash-graph-temporal-versioning", @@ -3670,6 +3672,22 @@ dependencies = [ "walkdir", ] +[[package]] +name = "hash-graph-embeddings" +version = "0.0.0" +dependencies = [ + "derive_more", + "error-stack", + "hash-graph-types", + "reqwest", + "reqwest-middleware", + "reqwest-retry", + "reqwest-tracing", + "serde", + "simple-mermaid", + "tracing", +] + [[package]] name = "hash-graph-integration" version = "0.0.0" @@ -4968,7 +4986,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -6298,7 +6316,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -8318,10 +8336,31 @@ dependencies = [ "async-trait", "http 1.4.2", "reqwest", + "serde", "thiserror 2.0.18", "tower-service", ] +[[package]] +name = "reqwest-retry" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe2412db2af7d2268e7a5406be0431f37d9eb67ff390f35b395716f5f06c2eaa" +dependencies = [ + "anyhow", + "async-trait", + "futures", + "getrandom 0.2.17", + "http 1.4.2", + "hyper", + "reqwest", + "reqwest-middleware", + "retry-policies", + "thiserror 2.0.18", + "tokio", + "wasmtimer", +] + [[package]] name = "reqwest-tracing" version = "0.7.1" @@ -8344,6 +8383,15 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" +[[package]] +name = "retry-policies" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc05fbf560421a0357a750cbe78c7ca19d4923918490daabba313d5dbc871e47" +dependencies = [ + "rand 0.10.1", +] + [[package]] name = "rgb" version = "0.8.53" @@ -8494,7 +8542,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -8553,7 +8601,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -9236,7 +9284,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -9275,12 +9323,6 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" -[[package]] -name = "spin" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd5231412d905519dca6a5deb0327d407be68d6c941feec004533401d3a0a715" - [[package]] name = "spki" version = "0.7.3" @@ -9371,7 +9413,7 @@ dependencies = [ "cfg-if", "libc", "psm", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -9659,10 +9701,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.4.3", + "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -9728,7 +9770,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8c27177b12a6399ffc08b98f76f7c9a1f4fe9fc967c784c5a071fa8d93cf7e1" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -9750,7 +9792,7 @@ dependencies = [ "parking_lot", "rustix", "signal-hook", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -9760,7 +9802,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" dependencies = [ "rustix", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -11110,6 +11152,20 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmtimer" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c598d6b99ea013e35844697fc4670d08339d5cda15588f193c6beedd12f644b" +dependencies = [ + "futures", + "js-sys", + "parking_lot", + "pin-utils", + "slab", + "wasm-bindgen", +] + [[package]] name = "web-sys" version = "0.3.85" @@ -11261,7 +11317,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 42b78fb5027..f4d329e7189 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ members = [ "libs/@local/effect-dns/hickory", "libs/@local/graph/api", "libs/@local/graph/authorization", + "libs/@local/graph/embeddings", "libs/@local/graph/migrations", "libs/@local/graph/migrations-macros", "libs/@local/graph/postgres-store", @@ -75,6 +76,7 @@ hash-codec.path = "libs/@local/codec" hash-codegen.path = "libs/@local/codegen" hash-graph-api.path = "libs/@local/graph/api" hash-graph-authorization.path = "libs/@local/graph/authorization" +hash-graph-embeddings.path = "libs/@local/graph/embeddings" hash-graph-migrations.path = "libs/@local/graph/migrations" hash-graph-migrations-macros.path = "libs/@local/graph/migrations-macros" hash-graph-postgres-store.path = "libs/@local/graph/postgres-store" @@ -228,6 +230,7 @@ refinery = { version = "0.8.16", default-features = fa regex = { version = "1.11.2", default-features = false, features = ["perf", "unicode"] } reqwest = { version = "0.13.0", default-features = false, features = ["json", "rustls"] } reqwest-middleware = { version = "0.5.0", default-features = false } +reqwest-retry = { version = "0.9.1", default-features = false } reqwest-tracing = { version = "0.7.0", default-features = false } roaring = { version = "0.11.2", default-features = false } rpds = { version = "1.1.2", default-features = false } diff --git a/apps/hash-graph/Cargo.toml b/apps/hash-graph/Cargo.toml index 0148951d74d..0dcb10e4746 100644 --- a/apps/hash-graph/Cargo.toml +++ b/apps/hash-graph/Cargo.toml @@ -18,6 +18,7 @@ harpc-server = { workspace = true } hash-codec = { workspace = true } hash-graph-api = { workspace = true, features = ["clap"] } hash-graph-authorization = { workspace = true } +hash-graph-embeddings = { workspace = true } hash-graph-postgres-store = { workspace = true, features = ["clap"] } hash-graph-store = { workspace = true } hash-graph-type-fetcher = { workspace = true } diff --git a/apps/hash-graph/docs/dependency-diagram.mmd b/apps/hash-graph/docs/dependency-diagram.mmd index 8814115ea5f..2c772cf559f 100644 --- a/apps/hash-graph/docs/dependency-diagram.mmd +++ b/apps/hash-graph/docs/dependency-diagram.mmd @@ -15,95 +15,98 @@ graph TD 3[hash-codegen] 4[hash-graph-api] 5[hash-graph-authorization] - 6[hash-graph-migrations] - 7[hash-graph-migrations-macros] - 8[hash-graph-postgres-store] - 9[hash-graph-store] - 10[hash-graph-temporal-versioning] - 11[hash-graph-type-defs] - 12[hash-graph-type-fetcher] - 13[hash-graph-types] - 14[hash-graph-validation] - 15[harpc-client] - 16[harpc-codec] - 17[harpc-net] - 18[harpc-server] - 19[harpc-system] - 20[harpc-tower] - 21[harpc-types] - 22[harpc-wire-protocol] - 23[hashql-ast] - 24[hashql-compiletest] - 25[hashql-core] - 26[hashql-diagnostics] - 27[hashql-eval] - 28[hashql-hir] - 29[hashql-macros] - 30[hashql-mir] - 31[hashql-syntax-jexpr] - 32[hash-status] - 33[hash-telemetry] - 34[hash-temporal-client] - 35[darwin-kperf] - 36[darwin-kperf-criterion] - 37[darwin-kperf-events] - 38[darwin-kperf-sys] - 39[error-stack] - 40[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-migrations] + 8[hash-graph-migrations-macros] + 9[hash-graph-postgres-store] + 10[hash-graph-store] + 11[hash-graph-temporal-versioning] + 12[hash-graph-type-defs] + 13[hash-graph-type-fetcher] + 14[hash-graph-types] + 15[hash-graph-validation] + 16[harpc-client] + 17[harpc-codec] + 18[harpc-net] + 19[harpc-server] + 20[harpc-system] + 21[harpc-tower] + 22[harpc-types] + 23[harpc-wire-protocol] + 24[hashql-ast] + 25[hashql-compiletest] + 26[hashql-core] + 27[hashql-diagnostics] + 28[hashql-eval] + 29[hashql-hir] + 30[hashql-macros] + 31[hashql-mir] + 32[hashql-syntax-jexpr] + 33[hash-status] + 34[hash-telemetry] + 35[hash-temporal-client] + 36[darwin-kperf] + 37[darwin-kperf-criterion] + 38[darwin-kperf-events] + 39[darwin-kperf-sys] + 40[error-stack] + 41[hash-graph-test-data] 0 --> 4 - 1 --> 10 - 1 -.-> 40 + 1 --> 11 + 1 -.-> 41 2 -.-> 3 - 2 --> 22 - 4 --> 11 + 2 --> 23 + 4 --> 6 4 --> 12 - 4 --> 18 - 4 --> 27 - 4 --> 31 + 4 --> 13 + 4 --> 19 + 4 --> 28 + 4 --> 32 5 --> 1 - 6 --> 7 - 6 --> 33 - 8 -.-> 6 - 8 --> 14 - 8 --> 32 - 9 --> 5 - 9 --> 13 - 9 --> 34 - 10 --> 2 - 11 --> 32 - 12 --> 9 - 13 -.-> 40 - 14 -.-> 40 - 15 --> 19 - 16 --> 21 - 16 --> 39 - 17 --> 2 - 17 -.-> 16 - 17 --> 16 - 18 -.-> 1 - 18 -.-> 15 - 19 --> 20 - 20 -.-> 17 - 20 --> 17 - 22 -.-> 21 - 22 --> 21 - 22 --> 39 - 23 -.-> 24 - 24 --> 27 - 24 --> 31 - 25 --> 2 - 25 --> 26 - 25 --> 29 - 25 -.-> 36 - 27 --> 8 - 27 --> 30 - 28 -.-> 24 - 30 --> 28 - 31 --> 23 - 31 --> 25 - 33 --> 39 - 34 --> 1 - 35 --> 37 - 35 --> 38 - 36 --> 35 - 40 --> 9 + 6 --> 14 + 7 --> 8 + 7 --> 34 + 9 -.-> 7 + 9 --> 15 + 9 --> 33 + 10 --> 5 + 10 --> 14 + 10 --> 35 + 11 --> 2 + 12 --> 33 + 13 --> 10 + 14 -.-> 41 + 15 -.-> 41 + 16 --> 20 + 17 --> 22 + 17 --> 40 + 18 --> 2 + 18 -.-> 17 + 18 --> 17 + 19 -.-> 1 + 19 -.-> 16 + 20 --> 21 + 21 -.-> 18 + 21 --> 18 + 23 -.-> 22 + 23 --> 22 + 23 --> 40 + 24 -.-> 25 + 25 --> 28 + 25 --> 32 + 26 --> 2 + 26 --> 27 + 26 --> 30 + 26 -.-> 37 + 28 --> 9 + 28 --> 31 + 29 -.-> 25 + 31 --> 29 + 32 --> 24 + 32 --> 26 + 34 --> 40 + 35 --> 1 + 36 --> 38 + 36 --> 39 + 37 --> 36 + 41 --> 10 diff --git a/apps/hash-graph/package.json b/apps/hash-graph/package.json index 086ba3ea46a..e9d86b09a51 100644 --- a/apps/hash-graph/package.json +++ b/apps/hash-graph/package.json @@ -27,6 +27,7 @@ "@rust/hash-codec": "workspace:*", "@rust/hash-graph-api": "workspace:*", "@rust/hash-graph-authorization": "workspace:*", + "@rust/hash-graph-embeddings": "workspace:*", "@rust/hash-graph-postgres-store": "workspace:*", "@rust/hash-graph-store": "workspace:*", "@rust/hash-graph-type-fetcher": "workspace:*", diff --git a/apps/hash-graph/src/subcommand/server.rs b/apps/hash-graph/src/subcommand/server.rs index 8c2dbeb27ab..081d0b01bc7 100644 --- a/apps/hash-graph/src/subcommand/server.rs +++ b/apps/hash-graph/src/subcommand/server.rs @@ -18,6 +18,7 @@ use hash_graph_api::{ rpc::Dependencies, }; use hash_graph_authorization::policies::store::{PolicyStore, PrincipalStore}; +use hash_graph_embeddings::{OpenAiEmbeddingClient, OpenAiEmbeddingClientConfig}; use hash_graph_postgres_store::store::{ DatabaseConnectionInfo, DatabasePoolConfig, PostgresStorePool, PostgresStoreSettings, }; @@ -126,6 +127,13 @@ pub struct ServerConfig { #[clap(flatten)] pub temporal: TemporalConfig, + /// The OpenAI API key used to generate embeddings for semantic search queries. + /// + /// If not set, the entity and entity-type search endpoints cannot resolve a `semanticString`; + /// callers must provide a precomputed `embedding` instead. + #[clap(long, env = "HASH_GRAPH_OPENAI_API_KEY")] + pub openai_api_key: Option, + /// A regex which *new* Type System URLs are checked against. Trying to create new Types with /// a domain that doesn't satisfy the pattern will error. /// @@ -246,6 +254,33 @@ async fn create_temporal_client( } } +fn create_embedding_client( + config: &ServerConfig, +) -> Result, Report> { + // Treat an unset *or empty* key as "not configured", so an empty environment variable (a + // common deployment footgun) cleanly disables semantic-string search instead of building a + // client that fails every request with an authentication error. + let Some(api_key) = config + .openai_api_key + .as_ref() + .filter(|api_key| !api_key.trim().is_empty()) + else { + tracing::warn!( + "`HASH_GRAPH_OPENAI_API_KEY` is not set; semantic-string search is disabled. Search \ + requests must supply a precomputed embedding." + ); + return Ok(None); + }; + + let client = OpenAiEmbeddingClient::new(OpenAiEmbeddingClientConfig { + api_key: api_key.trim().to_owned(), + base_url: None, + }) + .change_context(GraphError)?; + tracing::info!("Semantic-string search is enabled via the OpenAI embedding client."); + Ok(Some(client)) +} + fn start_rest_server(router: axum::Router, address: HttpAddress, lifecycle: &ServerLifecycle) { let shutdown = lifecycle.shutdown.clone(); lifecycle.spawn("REST server", async move { @@ -326,6 +361,7 @@ where let temporal_client = create_temporal_client(&config.temporal) .await? .map(Arc::new); + let embedding_client = create_embedding_client(&config)?.map(Arc::new); if config.rpc_enabled { tracing::info!("Starting RPC server..."); @@ -345,6 +381,7 @@ where store, domain_regex: DomainValidator::new(config.allowed_url_domain), temporal_client, + embedding_client, query_logger, api_config: config.api_config, }); diff --git a/infra/compose/compose.yml b/infra/compose/compose.yml index da1d660f1f2..b92b5a6b40c 100644 --- a/infra/compose/compose.yml +++ b/infra/compose/compose.yml @@ -620,6 +620,7 @@ services: HASH_GRAPH_SENTRY_DSN: "${HASH_GRAPH_SENTRY_DSN-}" HASH_TEMPORAL_SERVER_HOST: "http://temporal" HASH_TEMPORAL_SERVER_PORT: "7233" + HASH_GRAPH_OPENAI_API_KEY: "${OPENAI_API_KEY-}" HASH_OTLP_ENDPOINT: "http://otel-collector:4317" RUST_BACKTRACE: 0 ports: diff --git a/libs/@blockprotocol/type-system/rust/docs/dependency-diagram.mmd b/libs/@blockprotocol/type-system/rust/docs/dependency-diagram.mmd index b9a27b2f1e8..d97925ceaff 100644 --- a/libs/@blockprotocol/type-system/rust/docs/dependency-diagram.mmd +++ b/libs/@blockprotocol/type-system/rust/docs/dependency-diagram.mmd @@ -15,58 +15,61 @@ graph TD 3[hash-codegen] 4[hash-graph-api] 5[hash-graph-authorization] - 6[hash-graph-postgres-store] - 7[hash-graph-store] - 8[hash-graph-temporal-versioning] - 9[hash-graph-type-fetcher] - 10[hash-graph-types] - 11[hash-graph-validation] - 12[harpc-server] - 13[harpc-types] - 14[harpc-wire-protocol] - 15[hashql-ast] - 16[hashql-compiletest] - 17[hashql-eval] - 18[hashql-hir] - 19[hashql-mir] - 20[hashql-syntax-jexpr] - 21[hash-temporal-client] - 22[error-stack] - 23[hash-graph-benches] - 24[hash-graph-integration] - 25[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-postgres-store] + 8[hash-graph-store] + 9[hash-graph-temporal-versioning] + 10[hash-graph-type-fetcher] + 11[hash-graph-types] + 12[hash-graph-validation] + 13[harpc-server] + 14[harpc-types] + 15[harpc-wire-protocol] + 16[hashql-ast] + 17[hashql-compiletest] + 18[hashql-eval] + 19[hashql-hir] + 20[hashql-mir] + 21[hashql-syntax-jexpr] + 22[hash-temporal-client] + 23[error-stack] + 24[hash-graph-benches] + 25[hash-graph-integration] + 26[hash-graph-test-data] 0 --> 4 - 1 --> 8 - 1 -.-> 25 + 1 --> 9 + 1 -.-> 26 2 -.-> 3 - 2 --> 14 - 4 --> 9 - 4 --> 12 - 4 --> 17 - 4 --> 20 + 2 --> 15 + 4 --> 6 + 4 --> 10 + 4 --> 13 + 4 --> 18 + 4 --> 21 5 --> 1 6 --> 11 - 7 --> 5 - 7 --> 10 - 7 --> 21 - 8 --> 2 - 9 --> 7 - 10 -.-> 25 - 11 -.-> 25 - 12 -.-> 1 - 12 --> 13 - 14 -.-> 13 - 14 --> 13 - 14 --> 22 - 15 -.-> 16 - 16 --> 17 - 16 --> 20 - 17 --> 6 - 17 --> 19 - 18 -.-> 16 - 19 --> 18 - 20 --> 15 - 21 --> 1 - 23 -.-> 4 - 24 -.-> 6 - 25 --> 7 + 7 --> 12 + 8 --> 5 + 8 --> 11 + 8 --> 22 + 9 --> 2 + 10 --> 8 + 11 -.-> 26 + 12 -.-> 26 + 13 -.-> 1 + 13 --> 14 + 15 -.-> 14 + 15 --> 14 + 15 --> 23 + 16 -.-> 17 + 17 --> 18 + 17 --> 21 + 18 --> 7 + 18 --> 20 + 19 -.-> 17 + 20 --> 19 + 21 --> 16 + 22 --> 1 + 24 -.-> 4 + 25 -.-> 7 + 26 --> 8 diff --git a/libs/@local/codec/docs/dependency-diagram.mmd b/libs/@local/codec/docs/dependency-diagram.mmd index e40e05fce7a..2dc72bb1081 100644 --- a/libs/@local/codec/docs/dependency-diagram.mmd +++ b/libs/@local/codec/docs/dependency-diagram.mmd @@ -15,70 +15,73 @@ graph TD 3[hash-codegen] 4[hash-graph-api] 5[hash-graph-authorization] - 6[hash-graph-postgres-store] - 7[hash-graph-store] - 8[hash-graph-temporal-versioning] - 9[hash-graph-type-fetcher] - 10[hash-graph-types] - 11[hash-graph-validation] - 12[harpc-client] - 13[harpc-net] - 14[harpc-server] - 15[harpc-system] - 16[harpc-tower] - 17[harpc-types] - 18[harpc-wire-protocol] - 19[hashql-ast] - 20[hashql-compiletest] - 21[hashql-core] - 22[hashql-eval] - 23[hashql-hir] - 24[hashql-mir] - 25[hashql-syntax-jexpr] - 26[hash-temporal-client] - 27[error-stack] - 28[hash-graph-benches] - 29[hash-graph-integration] - 30[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-postgres-store] + 8[hash-graph-store] + 9[hash-graph-temporal-versioning] + 10[hash-graph-type-fetcher] + 11[hash-graph-types] + 12[hash-graph-validation] + 13[harpc-client] + 14[harpc-net] + 15[harpc-server] + 16[harpc-system] + 17[harpc-tower] + 18[harpc-types] + 19[harpc-wire-protocol] + 20[hashql-ast] + 21[hashql-compiletest] + 22[hashql-core] + 23[hashql-eval] + 24[hashql-hir] + 25[hashql-mir] + 26[hashql-syntax-jexpr] + 27[hash-temporal-client] + 28[error-stack] + 29[hash-graph-benches] + 30[hash-graph-integration] + 31[hash-graph-test-data] 0 --> 4 - 1 --> 8 - 1 -.-> 30 + 1 --> 9 + 1 -.-> 31 2 -.-> 3 - 2 --> 18 - 4 --> 9 - 4 --> 14 - 4 --> 22 - 4 --> 25 + 2 --> 19 + 4 --> 6 + 4 --> 10 + 4 --> 15 + 4 --> 23 + 4 --> 26 5 --> 1 6 --> 11 - 7 --> 5 - 7 --> 10 - 7 --> 26 - 8 --> 2 - 9 --> 7 - 10 -.-> 30 - 11 -.-> 30 - 12 --> 15 - 13 --> 2 - 14 -.-> 1 - 14 -.-> 12 - 15 --> 16 - 16 -.-> 13 - 16 --> 13 - 18 -.-> 17 - 18 --> 17 - 18 --> 27 - 19 -.-> 20 - 20 --> 22 - 20 --> 25 - 21 --> 2 - 22 --> 6 - 22 --> 24 - 23 -.-> 20 - 24 --> 23 - 25 --> 19 - 25 --> 21 - 26 --> 1 - 28 -.-> 4 - 29 -.-> 6 - 30 --> 7 + 7 --> 12 + 8 --> 5 + 8 --> 11 + 8 --> 27 + 9 --> 2 + 10 --> 8 + 11 -.-> 31 + 12 -.-> 31 + 13 --> 16 + 14 --> 2 + 15 -.-> 1 + 15 -.-> 13 + 16 --> 17 + 17 -.-> 14 + 17 --> 14 + 19 -.-> 18 + 19 --> 18 + 19 --> 28 + 20 -.-> 21 + 21 --> 23 + 21 --> 26 + 22 --> 2 + 23 --> 7 + 23 --> 25 + 24 -.-> 21 + 25 --> 24 + 26 --> 20 + 26 --> 22 + 27 --> 1 + 29 -.-> 4 + 30 -.-> 7 + 31 --> 8 diff --git a/libs/@local/codegen/docs/dependency-diagram.mmd b/libs/@local/codegen/docs/dependency-diagram.mmd index 601be9ff4ba..46e354a3c6d 100644 --- a/libs/@local/codegen/docs/dependency-diagram.mmd +++ b/libs/@local/codegen/docs/dependency-diagram.mmd @@ -15,63 +15,66 @@ graph TD class 3 root 4[hash-graph-api] 5[hash-graph-authorization] - 6[hash-graph-postgres-store] - 7[hash-graph-store] - 8[hash-graph-temporal-versioning] - 9[hash-graph-type-fetcher] - 10[hash-graph-types] - 11[hash-graph-validation] - 12[harpc-client] - 13[harpc-net] - 14[harpc-server] - 15[harpc-system] - 16[harpc-tower] - 17[hashql-ast] - 18[hashql-compiletest] - 19[hashql-core] - 20[hashql-eval] - 21[hashql-hir] - 22[hashql-mir] - 23[hashql-syntax-jexpr] - 24[hash-temporal-client] - 25[hash-graph-benches] - 26[hash-graph-integration] - 27[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-postgres-store] + 8[hash-graph-store] + 9[hash-graph-temporal-versioning] + 10[hash-graph-type-fetcher] + 11[hash-graph-types] + 12[hash-graph-validation] + 13[harpc-client] + 14[harpc-net] + 15[harpc-server] + 16[harpc-system] + 17[harpc-tower] + 18[hashql-ast] + 19[hashql-compiletest] + 20[hashql-core] + 21[hashql-eval] + 22[hashql-hir] + 23[hashql-mir] + 24[hashql-syntax-jexpr] + 25[hash-temporal-client] + 26[hash-graph-benches] + 27[hash-graph-integration] + 28[hash-graph-test-data] 0 --> 4 - 1 --> 8 - 1 -.-> 27 + 1 --> 9 + 1 -.-> 28 2 -.-> 3 - 4 --> 9 - 4 --> 14 - 4 --> 20 - 4 --> 23 + 4 --> 6 + 4 --> 10 + 4 --> 15 + 4 --> 21 + 4 --> 24 5 --> 1 6 --> 11 - 7 --> 5 - 7 --> 10 - 7 --> 24 - 8 --> 2 - 9 --> 7 - 10 -.-> 27 - 11 -.-> 27 - 12 --> 15 - 13 --> 2 - 14 -.-> 1 - 14 -.-> 12 - 15 --> 16 - 16 -.-> 13 - 16 --> 13 - 17 -.-> 18 - 18 --> 20 - 18 --> 23 - 19 --> 2 - 20 --> 6 - 20 --> 22 - 21 -.-> 18 - 22 --> 21 - 23 --> 17 - 23 --> 19 - 24 --> 1 - 25 -.-> 4 - 26 -.-> 6 - 27 --> 7 + 7 --> 12 + 8 --> 5 + 8 --> 11 + 8 --> 25 + 9 --> 2 + 10 --> 8 + 11 -.-> 28 + 12 -.-> 28 + 13 --> 16 + 14 --> 2 + 15 -.-> 1 + 15 -.-> 13 + 16 --> 17 + 17 -.-> 14 + 17 --> 14 + 18 -.-> 19 + 19 --> 21 + 19 --> 24 + 20 --> 2 + 21 --> 7 + 21 --> 23 + 22 -.-> 19 + 23 --> 22 + 24 --> 18 + 24 --> 20 + 25 --> 1 + 26 -.-> 4 + 27 -.-> 7 + 28 --> 8 diff --git a/libs/@local/graph/api/Cargo.toml b/libs/@local/graph/api/Cargo.toml index 8099f3985e1..d2c2e6f268a 100644 --- a/libs/@local/graph/api/Cargo.toml +++ b/libs/@local/graph/api/Cargo.toml @@ -16,6 +16,7 @@ path = "src/bin/openapi-spec-generator.rs" harpc-client = { workspace = true, public = true } harpc-server = { workspace = true, public = true } hash-graph-authorization = { workspace = true, public = true } +hash-graph-embeddings = { workspace = true, public = true } hash-graph-postgres-store = { workspace = true, public = true, features = ["utoipa"] } hash-graph-type-fetcher = { workspace = true, public = true } hash-graph-types = { workspace = true, public = true, features = ["utoipa"] } diff --git a/libs/@local/graph/api/docs/dependency-diagram.mmd b/libs/@local/graph/api/docs/dependency-diagram.mmd index 57d0ebd98d4..3ec3bd7bc71 100644 --- a/libs/@local/graph/api/docs/dependency-diagram.mmd +++ b/libs/@local/graph/api/docs/dependency-diagram.mmd @@ -15,97 +15,100 @@ graph TD 4[hash-graph-api] class 4 root 5[hash-graph-authorization] - 6[hash-graph-migrations] - 7[hash-graph-migrations-macros] - 8[hash-graph-postgres-store] - 9[hash-graph-store] - 10[hash-graph-temporal-versioning] - 11[hash-graph-type-defs] - 12[hash-graph-type-fetcher] - 13[hash-graph-types] - 14[hash-graph-validation] - 15[harpc-client] - 16[harpc-codec] - 17[harpc-net] - 18[harpc-server] - 19[harpc-system] - 20[harpc-tower] - 21[harpc-types] - 22[harpc-wire-protocol] - 23[hashql-ast] - 24[hashql-compiletest] - 25[hashql-core] - 26[hashql-diagnostics] - 27[hashql-eval] - 28[hashql-hir] - 29[hashql-macros] - 30[hashql-mir] - 31[hashql-syntax-jexpr] - 32[hash-status] - 33[hash-telemetry] - 34[hash-temporal-client] - 35[darwin-kperf] - 36[darwin-kperf-criterion] - 37[darwin-kperf-events] - 38[darwin-kperf-sys] - 39[error-stack] - 40[hash-graph-benches] - 41[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-migrations] + 8[hash-graph-migrations-macros] + 9[hash-graph-postgres-store] + 10[hash-graph-store] + 11[hash-graph-temporal-versioning] + 12[hash-graph-type-defs] + 13[hash-graph-type-fetcher] + 14[hash-graph-types] + 15[hash-graph-validation] + 16[harpc-client] + 17[harpc-codec] + 18[harpc-net] + 19[harpc-server] + 20[harpc-system] + 21[harpc-tower] + 22[harpc-types] + 23[harpc-wire-protocol] + 24[hashql-ast] + 25[hashql-compiletest] + 26[hashql-core] + 27[hashql-diagnostics] + 28[hashql-eval] + 29[hashql-hir] + 30[hashql-macros] + 31[hashql-mir] + 32[hashql-syntax-jexpr] + 33[hash-status] + 34[hash-telemetry] + 35[hash-temporal-client] + 36[darwin-kperf] + 37[darwin-kperf-criterion] + 38[darwin-kperf-events] + 39[darwin-kperf-sys] + 40[error-stack] + 41[hash-graph-benches] + 42[hash-graph-test-data] 0 --> 4 - 1 --> 10 - 1 -.-> 41 + 1 --> 11 + 1 -.-> 42 2 -.-> 3 - 2 --> 22 - 4 --> 11 + 2 --> 23 + 4 --> 6 4 --> 12 - 4 --> 18 - 4 --> 27 - 4 --> 31 + 4 --> 13 + 4 --> 19 + 4 --> 28 + 4 --> 32 5 --> 1 - 6 --> 7 - 6 --> 33 - 8 -.-> 6 - 8 --> 14 - 8 --> 32 - 9 --> 5 - 9 --> 13 - 9 --> 34 - 10 --> 2 - 11 --> 32 - 12 --> 9 - 13 -.-> 41 - 14 -.-> 41 - 15 --> 19 - 16 --> 21 - 16 --> 39 - 17 --> 2 - 17 -.-> 16 - 17 --> 16 - 18 -.-> 1 - 18 -.-> 15 - 19 --> 20 - 20 -.-> 17 - 20 --> 17 - 22 -.-> 21 - 22 --> 21 - 22 --> 39 - 23 -.-> 24 - 24 --> 27 - 24 --> 31 - 25 --> 2 - 25 --> 26 - 25 --> 29 - 25 -.-> 36 - 27 --> 8 - 27 --> 30 - 28 -.-> 24 - 30 --> 28 - 31 --> 23 - 31 --> 25 - 33 --> 39 - 34 --> 1 - 35 --> 37 - 35 --> 38 - 36 --> 35 - 40 -.-> 4 - 41 --> 9 + 6 --> 14 + 7 --> 8 + 7 --> 34 + 9 -.-> 7 + 9 --> 15 + 9 --> 33 + 10 --> 5 + 10 --> 14 + 10 --> 35 + 11 --> 2 + 12 --> 33 + 13 --> 10 + 14 -.-> 42 + 15 -.-> 42 + 16 --> 20 + 17 --> 22 + 17 --> 40 + 18 --> 2 + 18 -.-> 17 + 18 --> 17 + 19 -.-> 1 + 19 -.-> 16 + 20 --> 21 + 21 -.-> 18 + 21 --> 18 + 23 -.-> 22 + 23 --> 22 + 23 --> 40 + 24 -.-> 25 + 25 --> 28 + 25 --> 32 + 26 --> 2 + 26 --> 27 + 26 --> 30 + 26 -.-> 37 + 28 --> 9 + 28 --> 31 + 29 -.-> 25 + 31 --> 29 + 32 --> 24 + 32 --> 26 + 34 --> 40 + 35 --> 1 + 36 --> 38 + 36 --> 39 + 37 --> 36 + 41 -.-> 4 + 42 --> 10 diff --git a/libs/@local/graph/api/openapi/openapi.json b/libs/@local/graph/api/openapi/openapi.json index 6fa9755b618..10199a45c71 100644 --- a/libs/@local/graph/api/openapi/openapi.json +++ b/libs/@local/graph/api/openapi/openapi.json @@ -8797,14 +8797,17 @@ }, "SearchEntitiesRequest": { "type": "object", - "description": "Request body for the entity embedding search endpoint.", + "description": "Request body for the entity embedding search endpoint.\n\nExactly one of `embedding` or `semanticString` must be provided. `semanticString` is converted\ninto an embedding by the server, which requires an embedding client to be configured.", "required": [ - "embedding", "maximumSemanticDistance" ], "properties": { "embedding": { - "$ref": "#/components/schemas/Embedding" + "allOf": [ + { + "$ref": "#/components/schemas/Embedding" + } + ] }, "filter": { "$ref": "#/components/schemas/SearchEntitiesFilter" @@ -8820,6 +8823,9 @@ "maximumSemanticDistance": { "type": "number", "format": "double" + }, + "semanticString": { + "type": "string" } }, "additionalProperties": false @@ -8847,14 +8853,17 @@ }, "SearchEntityTypesRequest": { "type": "object", - "description": "Request body for the entity type embedding search endpoint.", + "description": "Request body for the entity type embedding search endpoint.\n\nExactly one of `embedding` or `semanticString` must be provided. `semanticString` is converted\ninto an embedding by the server, which requires an embedding client to be configured.", "required": [ - "embedding", "maximumSemanticDistance" ], "properties": { "embedding": { - "$ref": "#/components/schemas/Embedding" + "allOf": [ + { + "$ref": "#/components/schemas/Embedding" + } + ] }, "limit": { "type": "integer", @@ -8864,6 +8873,9 @@ "maximumSemanticDistance": { "type": "number", "format": "double" + }, + "semanticString": { + "type": "string" } }, "additionalProperties": false diff --git a/libs/@local/graph/api/package.json b/libs/@local/graph/api/package.json index aada61f4d33..78104e1b39e 100644 --- a/libs/@local/graph/api/package.json +++ b/libs/@local/graph/api/package.json @@ -22,6 +22,7 @@ "@rust/harpc-types": "workspace:*", "@rust/hash-codec": "workspace:*", "@rust/hash-graph-authorization": "workspace:*", + "@rust/hash-graph-embeddings": "workspace:*", "@rust/hash-graph-postgres-store": "workspace:*", "@rust/hash-graph-store": "workspace:*", "@rust/hash-graph-temporal-versioning": "workspace:*", diff --git a/libs/@local/graph/api/src/rest/entity.rs b/libs/@local/graph/api/src/rest/entity.rs index dcc4846f143..244cba77669 100644 --- a/libs/@local/graph/api/src/rest/entity.rs +++ b/libs/@local/graph/api/src/rest/entity.rs @@ -6,6 +6,7 @@ use std::collections::HashMap; use axum::{Extension, Router, routing::post}; use error_stack::{Report, ResultExt as _}; use hash_graph_authorization::policies::principal::actor::AuthenticatedActor; +use hash_graph_embeddings::OpenAiEmbeddingClient; use hash_graph_postgres_store::store::error::{EntityDoesNotExist, RaceConditionOnUpdate}; use hash_graph_store::{ self, @@ -511,20 +512,21 @@ async fn search_entities( AuthenticatedUserHeader(actor_id): AuthenticatedUserHeader, store_pool: Extension>, temporal_client: Extension>>, + embedding_client: Extension>>, Extension(api_config): Extension, Json(request): Json, ) -> Result, BoxedResponse> where S: StorePool + Send + Sync, { - let store = store_pool - .acquire(temporal_client.0) + let params = request + .into_params(api_config, embedding_client.0.as_deref()) .await .map_err(report_to_response)?; - let params = request - .into_params(api_config) - .attach(hash_status::StatusCode::InvalidArgument) + let store = store_pool + .acquire(temporal_client.0) + .await .map_err(report_to_response)?; store diff --git a/libs/@local/graph/api/src/rest/entity_query_request.rs b/libs/@local/graph/api/src/rest/entity_query_request.rs index 1d5d0fa962b..47a0a59ba1a 100644 --- a/libs/@local/graph/api/src/rest/entity_query_request.rs +++ b/libs/@local/graph/api/src/rest/entity_query_request.rs @@ -22,6 +22,7 @@ use axum::{ response::{Html, IntoResponse as _}, }; use error_stack::{Report, ResultExt as _}; +use hash_graph_embeddings::OpenAiEmbeddingClient; use hash_graph_store::{ entity::{ EntityQueryCursor, EntityQueryPath, EntityQuerySorting, EntityQuerySortingRecord, @@ -68,7 +69,8 @@ use type_system::knowledge::Entity; use utoipa::ToSchema; use super::{ - ApiConfig, LimitExceededError, SearchRequestError, resolve_limit, status::BoxedResponse, + ApiConfig, LimitExceededError, SearchRequestError, resolve_limit, resolve_search_embedding, + status::BoxedResponse, }; #[tracing::instrument(level = "info", skip_all)] @@ -596,10 +598,18 @@ impl<'p> EntityQueryOptions<'_, 'p> { } /// Request body for the entity embedding search endpoint. +/// +/// Exactly one of `embedding` or `semanticString` must be provided. `semanticString` is converted +/// into an embedding by the server, which requires an embedding client to be configured. #[derive(Debug, Deserialize, ToSchema)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchEntitiesRequest { - pub embedding: Embedding<'static>, + #[serde(default)] + #[schema(nullable = false)] + pub embedding: Option>, + #[serde(default)] + #[schema(nullable = false)] + pub semantic_string: Option, pub maximum_semantic_distance: f64, pub limit: Option, #[serde(default)] @@ -609,25 +619,28 @@ pub struct SearchEntitiesRequest { } impl SearchEntitiesRequest { - /// # Errors + /// Converts the request into [`SearchEntitiesParams`], resolving the query embedding. /// - /// - [`InvalidSemanticDistance`] if the maximum semantic distance is invalid. - /// - [`LimitExceeded`] if the requested limit exceeds the configured maximum. + /// # Errors /// - /// [`InvalidSemanticDistance`]: [`SearchRequestError::InvalidSemanticDistance`] - /// [`LimitExceeded`]: [`SearchRequestError::LimitExceeded`] - pub fn into_params( + /// Returns a [`SearchRequestError`] if the query embedding cannot be resolved, the maximum + /// semantic distance is invalid, or the requested limit exceeds the configured maximum. + pub async fn into_params( self, config: ApiConfig, + embedding_client: Option<&OpenAiEmbeddingClient>, ) -> Result> { + let embedding = + resolve_search_embedding(self.embedding, self.semantic_string, embedding_client) + .await?; Ok(SearchEntitiesParams { - embedding: self.embedding, + embedding, maximum_semantic_distance: SemanticDistance::try_from(self.maximum_semantic_distance) - .change_context( - SearchRequestError::InvalidSemanticDistance, - )?, + .change_context(SearchRequestError::InvalidSemanticDistance) + .attach(hash_status::StatusCode::InvalidArgument)?, limit: resolve_limit(self.limit, config.query_entity_limit) - .change_context(SearchRequestError::LimitExceeded)?, + .change_context(SearchRequestError::LimitExceeded) + .attach(hash_status::StatusCode::InvalidArgument)?, include_entity_types: self.include_entity_types, filter: self.filter, }) diff --git a/libs/@local/graph/api/src/rest/entity_type.rs b/libs/@local/graph/api/src/rest/entity_type.rs index 9d61258b70d..bcbc24ce6ff 100644 --- a/libs/@local/graph/api/src/rest/entity_type.rs +++ b/libs/@local/graph/api/src/rest/entity_type.rs @@ -9,6 +9,7 @@ use axum::{ }; use error_stack::{Report, ResultExt as _}; use hash_graph_authorization::policies::principal::actor::AuthenticatedActor; +use hash_graph_embeddings::OpenAiEmbeddingClient; use hash_graph_postgres_store::{ ontology::patch_id_and_parse, store::error::{BaseUrlAlreadyExists, OntologyVersionDoesNotExist, VersionedUrlAlreadyExists}, @@ -51,7 +52,7 @@ use crate::rest::{ ApiConfig, AuthenticatedUserHeader, OpenApiQuery, QueryLogger, RestApiStore, SearchRequestError, json::Json, - resolve_limit, + resolve_limit, resolve_search_embedding, status::{report_to_response, status_to_response}, utoipa_typedef::{ListOrValue, MaybeListOfEntityType, subgraph::Subgraph}, }; @@ -541,38 +542,47 @@ where } /// Request body for the entity type embedding search endpoint. +/// +/// Exactly one of `embedding` or `semanticString` must be provided. `semanticString` is converted +/// into an embedding by the server, which requires an embedding client to be configured. #[derive(Debug, Deserialize, ToSchema)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchEntityTypesRequest { - pub embedding: Embedding<'static>, + #[serde(default)] + #[schema(nullable = false)] + pub embedding: Option>, + #[serde(default)] + #[schema(nullable = false)] + pub semantic_string: Option, pub maximum_semantic_distance: f64, pub limit: Option, } impl SearchEntityTypesRequest { /// Converts this request into the search parameters for the entity type embedding search - /// endpoint. + /// endpoint, resolving the query embedding. /// /// # Errors /// - /// - [`InvalidSemanticDistance`] if the maximum semantic distance is invalid. - /// - [`LimitExceeded`] if the requested limit exceeds the configured maximum in - /// [`ApiConfig::query_ontology_limit`]. - /// - /// [`InvalidSemanticDistance`]: SearchRequestError::InvalidSemanticDistance - /// [`LimitExceeded`]: SearchRequestError::LimitExceeded - pub fn into_params( + /// Returns a [`SearchRequestError`] if the query embedding cannot be resolved, the maximum + /// semantic distance is invalid, or the requested limit exceeds + /// [`ApiConfig::query_ontology_limit`]. + pub async fn into_params( self, config: ApiConfig, + embedding_client: Option<&OpenAiEmbeddingClient>, ) -> Result> { + let embedding = + resolve_search_embedding(self.embedding, self.semantic_string, embedding_client) + .await?; Ok(SearchEntityTypesParams { - embedding: self.embedding, + embedding, maximum_semantic_distance: SemanticDistance::try_from(self.maximum_semantic_distance) - .change_context( - SearchRequestError::InvalidSemanticDistance, - )?, + .change_context(SearchRequestError::InvalidSemanticDistance) + .attach(hash_status::StatusCode::InvalidArgument)?, limit: resolve_limit(self.limit, config.query_ontology_limit) - .change_context(SearchRequestError::LimitExceeded)?, + .change_context(SearchRequestError::LimitExceeded) + .attach(hash_status::StatusCode::InvalidArgument)?, }) } } @@ -600,6 +610,7 @@ async fn search_entity_types( AuthenticatedUserHeader(actor_id): AuthenticatedUserHeader, store_pool: Extension>, temporal_client: Extension>>, + embedding_client: Extension>>, Extension(api_config): Extension, Json(request): Json, ) -> Result, BoxedResponse> @@ -607,8 +618,8 @@ where S: StorePool + Send + Sync, { let params = request - .into_params(api_config) - .attach(hash_status::StatusCode::InvalidArgument) + .into_params(api_config, embedding_client.0.as_deref()) + .await .map_err(report_to_response)?; let store = store_pool diff --git a/libs/@local/graph/api/src/rest/mod.rs b/libs/@local/graph/api/src/rest/mod.rs index 8e3167fde0e..28265d71b7e 100644 --- a/libs/@local/graph/api/src/rest/mod.rs +++ b/libs/@local/graph/api/src/rest/mod.rs @@ -38,6 +38,7 @@ use error_stack::{Report, ResultExt as _}; use futures::{SinkExt as _, channel::mpsc::Sender}; use hash_codec::numeric::Real; use hash_graph_authorization::policies::store::{PolicyStore, PrincipalStore}; +use hash_graph_embeddings::{EmbeddingError, EmbeddingGenerator as _, OpenAiEmbeddingClient}; use hash_graph_postgres_store::store::error::VersionedUrlAlreadyExists; use hash_graph_store::{ account::AccountStore, @@ -67,6 +68,7 @@ use hash_graph_temporal_versioning::{ OpenTemporalBound, RightBoundedTemporalInterval, TemporalBound, Timestamp, TransactionTime, }; use hash_graph_type_fetcher::TypeFetcher; +use hash_graph_types::Embedding; use hash_status::Status; use hash_temporal_client::TemporalClient; use include_dir::{Dir, include_dir}; @@ -369,14 +371,111 @@ pub(crate) fn resolve_limit( /// A search request could not be converted into store parameters. #[derive(Debug, Copy, Clone, PartialEq, Eq, derive_more::Display)] pub enum SearchRequestError { + /// The requested `limit` exceeds the configured maximum. #[display("The requested limit is invalid.")] LimitExceeded, + /// The requested maximum semantic distance is outside the valid range. #[display("The requested maximum semantic distance is invalid.")] InvalidSemanticDistance, + /// Neither `embedding` nor `semanticString` was provided. + #[display("Neither an embedding nor a semantic string was provided.")] + MissingEmbeddingSource, + /// Both `embedding` and `semanticString` were provided. + #[display("Both an embedding and a semantic string were provided.")] + ConflictingEmbeddingSource, + /// The provided `embedding` does not have the expected number of dimensions. + #[display("The provided embedding has an invalid number of dimensions.")] + InvalidEmbeddingDimensions, + /// `semanticString` was provided but the server has no embedding client configured. + #[display("Semantic-string search is unavailable because no embedding client is configured.")] + EmbeddingClientUnavailable, + /// The embedding for the provided `semanticString` could not be generated. + #[display("The embedding for the semantic string could not be generated.")] + EmbeddingGenerationFailed, } impl Error for SearchRequestError {} +/// Resolves the query embedding for a search request. +/// +/// Exactly one of `embedding` or `semantic_string` must be provided. When `semantic_string` is +/// given, it is converted into an embedding using `embedding_client`, which must be configured for +/// the request to succeed. +/// +/// # Errors +/// +/// - [`MissingEmbeddingSource`] if neither `embedding` nor `semantic_string` is provided. +/// - [`ConflictingEmbeddingSource`] if both are provided. +/// - [`InvalidEmbeddingDimensions`] if a provided `embedding` has the wrong number of dimensions. +/// - [`EmbeddingClientUnavailable`] if `semantic_string` is provided but no embedding client is +/// configured. +/// - [`EmbeddingGenerationFailed`] if the embedding client fails to generate an embedding. +/// +/// [`MissingEmbeddingSource`]: SearchRequestError::MissingEmbeddingSource +/// [`ConflictingEmbeddingSource`]: SearchRequestError::ConflictingEmbeddingSource +/// [`InvalidEmbeddingDimensions`]: SearchRequestError::InvalidEmbeddingDimensions +/// [`EmbeddingClientUnavailable`]: SearchRequestError::EmbeddingClientUnavailable +/// [`EmbeddingGenerationFailed`]: SearchRequestError::EmbeddingGenerationFailed +pub(crate) async fn resolve_search_embedding( + embedding: Option>, + semantic_string: Option, + embedding_client: Option<&OpenAiEmbeddingClient>, +) -> Result, Report> { + match (embedding, semantic_string) { + (Some(embedding), None) => { + // Validate a caller-supplied embedding here: unlike the `semantic_string` path (where + // the client guarantees the dimensionality), a precomputed embedding would otherwise + // flow unchecked into the pgvector cosine-distance query and fail deep in the store. + if embedding.len() == Embedding::DIM { + Ok(embedding) + } else { + Err(Report::new(SearchRequestError::InvalidEmbeddingDimensions)) + .attach(hash_status::StatusCode::InvalidArgument) + } + } + (None, Some(semantic_string)) => { + let client = embedding_client + .ok_or_else(|| Report::new(SearchRequestError::EmbeddingClientUnavailable)) + .attach(hash_status::StatusCode::Unavailable)?; + client + .create_embeddings(&[semantic_string.as_str()]) + .await + .map_err(|report| { + let status = embedding_error_status(report.current_context()); + report + .change_context(SearchRequestError::EmbeddingGenerationFailed) + .attach(status) + })? + .into_iter() + .next() + .ok_or_else(|| Report::new(SearchRequestError::EmbeddingGenerationFailed)) + .attach(hash_status::StatusCode::Internal) + } + (Some(_), Some(_)) => Err(Report::new(SearchRequestError::ConflictingEmbeddingSource)) + .attach(hash_status::StatusCode::InvalidArgument), + (None, None) => Err(Report::new(SearchRequestError::MissingEmbeddingSource)) + .attach(hash_status::StatusCode::InvalidArgument), + } +} + +/// Maps an [`EmbeddingError`] to the HTTP status the search endpoints should report, so that +/// rate-limits and transient upstream outages are not flattened into an opaque `500`. +const fn embedding_error_status(error: &EmbeddingError) -> hash_status::StatusCode { + match error { + // Server-side configuration or provider-contract problems the caller cannot act on. + EmbeddingError::Unauthorized + | EmbeddingError::Response + | EmbeddingError::UnexpectedCount + | EmbeddingError::UnexpectedDimensions => hash_status::StatusCode::Internal, + // Rate limits are transient and the caller should back off. + EmbeddingError::RateLimited => hash_status::StatusCode::ResourceExhausted, + // A transport failure or upstream outage is transient and retryable. + EmbeddingError::Request | EmbeddingError::ProviderUnavailable => { + hash_status::StatusCode::Unavailable + } + } +} + /// Server-side configuration for the REST API, shared across handlers via an [`Extension`]. #[derive(Debug, Clone, Copy)] #[cfg_attr(feature = "clap", derive(clap::Parser))] @@ -408,6 +507,7 @@ where { pub store: Arc, pub temporal_client: Option>, + pub embedding_client: Option>, pub domain_regex: DomainValidator, pub query_logger: Option, pub api_config: ApiConfig, @@ -456,6 +556,7 @@ where .layer(http_tracing_layer::HttpTracingLayer) .layer(Extension(dependencies.store)) .layer(Extension(dependencies.temporal_client)) + .layer(Extension(dependencies.embedding_client)) .layer(Extension(dependencies.domain_regex)) .layer(Extension(dependencies.api_config)); diff --git a/libs/@local/graph/authorization/docs/dependency-diagram.mmd b/libs/@local/graph/authorization/docs/dependency-diagram.mmd index ce8b50f65b7..9b7424d94ae 100644 --- a/libs/@local/graph/authorization/docs/dependency-diagram.mmd +++ b/libs/@local/graph/authorization/docs/dependency-diagram.mmd @@ -15,58 +15,61 @@ graph TD 4[hash-graph-api] 5[hash-graph-authorization] class 5 root - 6[hash-graph-postgres-store] - 7[hash-graph-store] - 8[hash-graph-temporal-versioning] - 9[hash-graph-type-fetcher] - 10[hash-graph-types] - 11[hash-graph-validation] - 12[harpc-server] - 13[harpc-types] - 14[harpc-wire-protocol] - 15[hashql-ast] - 16[hashql-compiletest] - 17[hashql-eval] - 18[hashql-hir] - 19[hashql-mir] - 20[hashql-syntax-jexpr] - 21[hash-temporal-client] - 22[error-stack] - 23[hash-graph-benches] - 24[hash-graph-integration] - 25[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-postgres-store] + 8[hash-graph-store] + 9[hash-graph-temporal-versioning] + 10[hash-graph-type-fetcher] + 11[hash-graph-types] + 12[hash-graph-validation] + 13[harpc-server] + 14[harpc-types] + 15[harpc-wire-protocol] + 16[hashql-ast] + 17[hashql-compiletest] + 18[hashql-eval] + 19[hashql-hir] + 20[hashql-mir] + 21[hashql-syntax-jexpr] + 22[hash-temporal-client] + 23[error-stack] + 24[hash-graph-benches] + 25[hash-graph-integration] + 26[hash-graph-test-data] 0 --> 4 - 1 --> 8 - 1 -.-> 25 + 1 --> 9 + 1 -.-> 26 2 -.-> 3 - 2 --> 14 - 4 --> 9 - 4 --> 12 - 4 --> 17 - 4 --> 20 + 2 --> 15 + 4 --> 6 + 4 --> 10 + 4 --> 13 + 4 --> 18 + 4 --> 21 5 --> 1 6 --> 11 - 7 --> 5 - 7 --> 10 - 7 --> 21 - 8 --> 2 - 9 --> 7 - 10 -.-> 25 - 11 -.-> 25 - 12 -.-> 1 - 12 --> 13 - 14 -.-> 13 - 14 --> 13 - 14 --> 22 - 15 -.-> 16 - 16 --> 17 - 16 --> 20 - 17 --> 6 - 17 --> 19 - 18 -.-> 16 - 19 --> 18 - 20 --> 15 - 21 --> 1 - 23 -.-> 4 - 24 -.-> 6 - 25 --> 7 + 7 --> 12 + 8 --> 5 + 8 --> 11 + 8 --> 22 + 9 --> 2 + 10 --> 8 + 11 -.-> 26 + 12 -.-> 26 + 13 -.-> 1 + 13 --> 14 + 15 -.-> 14 + 15 --> 14 + 15 --> 23 + 16 -.-> 17 + 17 --> 18 + 17 --> 21 + 18 --> 7 + 18 --> 20 + 19 -.-> 17 + 20 --> 19 + 21 --> 16 + 22 --> 1 + 24 -.-> 4 + 25 -.-> 7 + 26 --> 8 diff --git a/libs/@local/graph/embeddings/Cargo.toml b/libs/@local/graph/embeddings/Cargo.toml new file mode 100644 index 00000000000..907e2eb5420 --- /dev/null +++ b/libs/@local/graph/embeddings/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "hash-graph-embeddings" +version.workspace = true +edition.workspace = true +license.workspace = true +publish.workspace = true +authors.workspace = true + +[dependencies] +# Public workspace dependencies +hash-graph-types = { workspace = true, public = true } + +# Public third-party dependencies + +# Private workspace dependencies +error-stack = { workspace = true } + +# Private third-party dependencies +derive_more = { workspace = true, features = ["display", "error"] } +reqwest = { workspace = true } +reqwest-middleware = { workspace = true, features = ["json"] } +reqwest-retry = { workspace = true } +reqwest-tracing = { workspace = true } +serde = { workspace = true, features = ["derive"] } +simple-mermaid = { workspace = true } +tracing = { workspace = true } + +[lints] +workspace = true diff --git a/libs/@local/graph/embeddings/LICENSE.md b/libs/@local/graph/embeddings/LICENSE.md new file mode 100644 index 00000000000..9a70d795493 --- /dev/null +++ b/libs/@local/graph/embeddings/LICENSE.md @@ -0,0 +1,606 @@ +# GNU Affero General Public License + +_Version 3, 19 November 2007_ +_Copyright © 2007 Free Software Foundation, Inc. <>_ + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. + +## Preamble + +The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + +The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + +When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + +Developers that use our General Public Licenses protect your rights +with two steps: **(1)** assert copyright on the software, and **(2)** offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + +A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + +The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + +An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + +The precise terms and conditions for copying, distribution and +modification follow. + +## TERMS AND CONDITIONS + +### 0. Definitions + +“This License” refers to version 3 of the GNU Affero General Public License. + +“Copyright” also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + +“The Program” refers to any copyrightable work licensed under this +License. Each licensee is addressed as “you”. “Licensees” and +“recipients” may be individuals or organizations. + +To “modify” a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a “modified version” of the +earlier work or a work “based on” the earlier work. + +A “covered work” means either the unmodified Program or a work based +on the Program. + +To “propagate” a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + +To “convey” a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + +An interactive user interface displays “Appropriate Legal Notices” +to the extent that it includes a convenient and prominently visible +feature that **(1)** displays an appropriate copyright notice, and **(2)** +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +### 1. Source Code + +The “source code” for a work means the preferred form of the work +for making modifications to it. “Object code” means any non-source +form of a work. + +A “Standard Interface” means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + +The “System Libraries” of an executable work include anything, other +than the work as a whole, that **(a)** is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and **(b)** serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +“Major Component”, in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + +The “Corresponding Source” for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + +The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + +The Corresponding Source for a work in source code form is that +same work. + +### 2. Basic Permissions + +All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + +Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + +### 3. Protecting Users' Legal Rights From Anti-Circumvention Law + +No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + +When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + +### 4. Conveying Verbatim Copies + +You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + +### 5. Conveying Modified Source Versions + +You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + +- **a)** The work must carry prominent notices stating that you modified + it, and giving a relevant date. +- **b)** The work must carry prominent notices stating that it is + released under this License and any conditions added under section 7. + This requirement modifies the requirement in section 4 to + “keep intact all notices”. +- **c)** You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. +- **d)** If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + +A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +“aggregate” if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + +### 6. Conveying Non-Source Forms + +You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + +- **a)** Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. +- **b)** Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either **(1)** a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or **(2)** access to copy the + Corresponding Source from a network server at no charge. +- **c)** Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. +- **d)** Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. +- **e)** Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + +A “User Product” is either **(1)** a “consumer product”, which means any +tangible personal property which is normally used for personal, family, +or household purposes, or **(2)** anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, “normally used” refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + +“Installation Information” for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + +If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + +The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + +Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + +### 7. Additional Terms + +“Additional permissions” are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + +- **a)** Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or +- **b)** Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or +- **c)** Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or +- **d)** Limiting the use for publicity purposes of names of licensors or + authors of the material; or +- **e)** Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or +- **f)** Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + +All other non-permissive additional terms are considered “further +restrictions” within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + +### 8. Termination + +You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + +However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated **(a)** +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and **(b)** permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + +### 9. Acceptance Not Required for Having Copies + +You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + +### 10. Automatic Licensing of Downstream Recipients + +Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + +An “entity transaction” is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + +### 11. Patents + +A “contributor” is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's “contributor version”. + +A contributor's “essential patent claims” are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, “control” includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + +In the following three paragraphs, a “patent license” is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To “grant” such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + +If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either **(1)** cause the Corresponding Source to be so +available, or **(2)** arrange to deprive yourself of the benefit of the +patent license for this particular work, or **(3)** arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. “Knowingly relying” means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + +A patent license is “discriminatory” if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license **(a)** in connection with copies of the covered work +conveyed by you (or copies made from those copies), or **(b)** primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + +### 12. No Surrender of Others' Freedom + +If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + +### 13. Remote Network Interaction; Use with the GNU General Public License + +Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + +Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + +### 14. Revised Versions of this License + +The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License “or any later version” applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + +If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + +Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + +### 15. Disclaimer of Warranty + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +### 16. Limitation of Liability + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + +### 17. Interpretation of Sections 15 and 16 + +If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. diff --git a/libs/@local/graph/embeddings/docs/dependency-diagram.mmd b/libs/@local/graph/embeddings/docs/dependency-diagram.mmd new file mode 100644 index 00000000000..1ebad383105 --- /dev/null +++ b/libs/@local/graph/embeddings/docs/dependency-diagram.mmd @@ -0,0 +1,47 @@ +graph TD + linkStyle default stroke-width:1.5px + classDef default stroke-width:1px + classDef root stroke-width:3px + classDef dev stroke-width:1px + classDef build stroke-width:1px + %% Legend + %% --> : Normal dependency + %% -.-> : Dev dependency + %% ---> : Build dependency + 0[hash-graph] + 1[type-system] + 2[hash-codec] + 3[hash-codegen] + 4[hash-graph-api] + 5[hash-graph-authorization] + 6[hash-graph-embeddings] + class 6 root + 7[hash-graph-store] + 8[hash-graph-temporal-versioning] + 9[hash-graph-types] + 10[harpc-types] + 11[harpc-wire-protocol] + 12[hash-temporal-client] + 13[error-stack] + 14[hash-graph-benches] + 15[hash-graph-test-data] + 0 --> 4 + 1 --> 8 + 1 -.-> 15 + 2 -.-> 3 + 2 --> 11 + 4 --> 6 + 4 --> 7 + 5 --> 1 + 6 --> 9 + 7 --> 5 + 7 --> 9 + 7 --> 12 + 8 --> 2 + 9 -.-> 15 + 11 -.-> 10 + 11 --> 10 + 11 --> 13 + 12 --> 1 + 14 -.-> 4 + 15 --> 7 diff --git a/libs/@local/graph/embeddings/package.json b/libs/@local/graph/embeddings/package.json new file mode 100644 index 00000000000..1ed4d31f77d --- /dev/null +++ b/libs/@local/graph/embeddings/package.json @@ -0,0 +1,15 @@ +{ + "name": "@rust/hash-graph-embeddings", + "version": "0.0.0-private", + "private": true, + "license": "AGPL-3", + "scripts": { + "doc:dependency-diagram": "cargo run -p hash-repo-chores -- dependency-diagram --output docs/dependency-diagram.mmd --root hash-graph-embeddings --root-deps-and-dependents --link-mode non-roots --include-dev-deps --include-build-deps --logging-console-level info", + "fix:clippy": "just clippy --fix", + "lint:clippy": "just clippy" + }, + "dependencies": { + "@rust/error-stack": "workspace:*", + "@rust/hash-graph-types": "workspace:*" + } +} diff --git a/libs/@local/graph/embeddings/src/error.rs b/libs/@local/graph/embeddings/src/error.rs new file mode 100644 index 00000000000..478289bba81 --- /dev/null +++ b/libs/@local/graph/embeddings/src/error.rs @@ -0,0 +1,27 @@ +use derive_more::{Display, Error}; + +/// An error that can occur while generating embeddings. +#[derive(Debug, Display, Error)] +pub enum EmbeddingError { + /// The request to the embedding provider could not be sent. + #[display("Could not send the embedding request to the provider")] + Request, + /// The provider rejected the configured credentials (HTTP 401/403). + #[display("The embedding provider rejected the configured API key")] + Unauthorized, + /// The provider rate-limited the request (HTTP 429). + #[display("The embedding provider rate-limited the request")] + RateLimited, + /// The provider is temporarily unavailable (HTTP 5xx). + #[display("The embedding provider is temporarily unavailable")] + ProviderUnavailable, + /// The provider responded with an unexpected error status or an unparseable body. + #[display("The embedding provider returned an error response")] + Response, + /// The provider returned a different number of embeddings than the number of inputs. + #[display("The embedding provider returned an unexpected number of embeddings")] + UnexpectedCount, + /// An embedding did not have the expected dimensionality. + #[display("The embedding provider returned an embedding with an unexpected dimensionality")] + UnexpectedDimensions, +} diff --git a/libs/@local/graph/embeddings/src/lib.rs b/libs/@local/graph/embeddings/src/lib.rs new file mode 100644 index 00000000000..a5e819a016f --- /dev/null +++ b/libs/@local/graph/embeddings/src/lib.rs @@ -0,0 +1,42 @@ +//! # HASH Graph Embeddings +//! +//! Generation of text embeddings for semantic search in the HASH Graph. +//! +//! ## Workspace dependencies +#![cfg_attr(doc, doc = simple_mermaid::mermaid!("../docs/dependency-diagram.mmd"))] + +pub use self::{ + error::EmbeddingError, + openai::{OpenAiEmbeddingClient, OpenAiEmbeddingClientConfig}, +}; + +mod error; +mod openai; + +use error_stack::Report; +use hash_graph_types::Embedding; + +/// Generates embedding vectors for text inputs. +/// +/// Implementations call out to an embedding provider (e.g. OpenAI). The generated embeddings are +/// used for semantic similarity search against the embeddings stored for entities and entity +/// types, so an implementation must produce embeddings from the same model that generated those +/// stored embeddings. +pub trait EmbeddingGenerator { + /// Generates an embedding for each input, returned in the same order as `inputs`. + /// + /// # Errors + /// + /// - [`EmbeddingError::Request`] if the request to the provider could not be sent. + /// - [`EmbeddingError::Unauthorized`] if the provider rejected the configured credentials. + /// - [`EmbeddingError::RateLimited`] if the provider rate-limited the request. + /// - [`EmbeddingError::ProviderUnavailable`] if the provider is temporarily unavailable. + /// - [`EmbeddingError::Response`] if the provider returned an error or an unparseable response. + /// - [`EmbeddingError::UnexpectedCount`] if the number of returned embeddings does not match + /// the number of inputs. + /// - [`EmbeddingError::UnexpectedDimensions`] if an embedding has an unexpected dimensionality. + fn create_embeddings( + &self, + inputs: &[&str], + ) -> impl Future>, Report>> + Send; +} diff --git a/libs/@local/graph/embeddings/src/openai.rs b/libs/@local/graph/embeddings/src/openai.rs new file mode 100644 index 00000000000..b2d3f1c61ca --- /dev/null +++ b/libs/@local/graph/embeddings/src/openai.rs @@ -0,0 +1,231 @@ +use core::{fmt, time::Duration}; + +use error_stack::{Report, ResultExt as _}; +use hash_graph_types::Embedding; +use reqwest::{Client, StatusCode}; +use reqwest_middleware::{ClientBuilder, ClientWithMiddleware}; +use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff}; +use reqwest_tracing::TracingMiddleware; +use serde::{Deserialize, Serialize}; + +use crate::{EmbeddingError, EmbeddingGenerator}; + +/// An OpenAI text-embedding model. +#[derive(Debug, Clone, Copy, Serialize)] +enum EmbeddingModel { + #[serde(rename = "text-embedding-3-large")] + TextEmbedding3Large, +} + +/// The encoding of the embedding values returned by the provider. +#[derive(Debug, Clone, Copy, Serialize)] +enum EncodingFormat { + #[serde(rename = "float")] + Float, +} + +/// The OpenAI embedding model used to generate query embeddings. +/// +/// This **must** match the model used to generate the stored entity and entity-type embeddings in +/// `apps/hash-ai-worker-ts/src/activities/shared/embeddings.ts`. The two are compared via cosine +/// distance during search, so a mismatch produces meaningless results. Changing the model requires +/// re-generating all stored embeddings on both sides. +const EMBEDDING_MODEL: EmbeddingModel = EmbeddingModel::TextEmbedding3Large; + +/// The default base URL of the OpenAI API. +const DEFAULT_BASE_URL: &str = "https://api.openai.com/v1"; + +/// Per-request timeout for the embedding endpoint. +/// +/// The embedding call sits in the synchronous search request path, so the timeout is kept short to +/// avoid blocking an interactive search on a struggling upstream. +const REQUEST_TIMEOUT: Duration = Duration::from_secs(10); + +/// The maximum number of retries for transient failures (HTTP 408, 429, 5xx, connection errors). +const MAX_RETRIES: u32 = 2; + +/// The shortest and longest backoff interval between retries. +const MIN_RETRY_INTERVAL: Duration = Duration::from_millis(250); +const MAX_RETRY_INTERVAL: Duration = Duration::from_secs(2); + +/// Configuration for an [`OpenAiEmbeddingClient`]. +#[derive(Clone)] +pub struct OpenAiEmbeddingClientConfig { + /// The OpenAI API key used to authenticate requests. + pub api_key: String, + /// Overrides the OpenAI API base URL. Defaults to `https://api.openai.com/v1`. + pub base_url: Option, +} + +impl fmt::Debug for OpenAiEmbeddingClientConfig { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("OpenAiEmbeddingClientConfig") + .field("api_key", &"[redacted]") + .field("base_url", &self.base_url) + .finish() + } +} + +/// An [`EmbeddingGenerator`] backed by the OpenAI embeddings API. +#[derive(Clone)] +pub struct OpenAiEmbeddingClient { + client: ClientWithMiddleware, + api_key: String, + base_url: String, +} + +impl fmt::Debug for OpenAiEmbeddingClient { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("OpenAiEmbeddingClient") + .field("base_url", &self.base_url) + .finish_non_exhaustive() + } +} + +impl OpenAiEmbeddingClient { + /// Creates a new client from the given configuration. + /// + /// Requests are retried on transient failures with exponential backoff. The retry policy is + /// deliberately conservative because the client is used in the interactive search path: + /// `Retry-After` headers are intentionally not honored, so a rate-limited or struggling + /// provider fails fast (within the bounded backoff) rather than blocking the search on a + /// provider-suggested delay. + /// + /// # Errors + /// + /// Returns [`EmbeddingError::Request`] if the underlying HTTP client could not be built. + pub fn new(config: OpenAiEmbeddingClientConfig) -> Result> { + let client = Client::builder() + .timeout(REQUEST_TIMEOUT) + .build() + .change_context(EmbeddingError::Request)?; + + let retry_policy = ExponentialBackoff::builder() + .retry_bounds(MIN_RETRY_INTERVAL, MAX_RETRY_INTERVAL) + .build_with_max_retries(MAX_RETRIES); + + let client = ClientBuilder::new(client) + .with(TracingMiddleware::default()) + .with(RetryTransientMiddleware::new_with_policy(retry_policy)) + .build(); + + // Normalize away any trailing slash so joining with `/embeddings` never yields a double + // slash, regardless of whether the configured base URL ends in `/`. + let base_url = config + .base_url + .as_deref() + .unwrap_or(DEFAULT_BASE_URL) + .trim_end_matches('/') + .to_owned(); + + Ok(Self { + client, + api_key: config.api_key, + base_url, + }) + } +} + +#[derive(Debug, Serialize)] +struct EmbeddingRequest<'a> { + model: EmbeddingModel, + input: &'a [&'a str], + encoding_format: EncodingFormat, +} + +#[derive(Debug, Deserialize)] +struct EmbeddingResponse { + data: Vec, +} + +#[derive(Debug, Deserialize)] +struct EmbeddingDatum { + index: usize, + embedding: Vec, +} + +impl EmbeddingGenerator for OpenAiEmbeddingClient { + async fn create_embeddings( + &self, + inputs: &[&str], + ) -> Result>, Report> { + let response = self + .client + .post(format!("{}/embeddings", self.base_url)) + .bearer_auth(&self.api_key) + .json(&EmbeddingRequest { + model: EMBEDDING_MODEL, + input: inputs, + encoding_format: EncodingFormat::Float, + }) + .send() + .await + .change_context(EmbeddingError::Request)?; + + let status = response.status(); + if !status.is_success() { + let body = response + .text() + .await + .unwrap_or_else(|error| format!("")); + // Classify the provider status so the API layer can surface an appropriate HTTP + // status (e.g. rate-limit vs. outage vs. misconfiguration) rather than a blanket 500. + let context = if status == StatusCode::UNAUTHORIZED || status == StatusCode::FORBIDDEN { + EmbeddingError::Unauthorized + } else if status == StatusCode::TOO_MANY_REQUESTS { + EmbeddingError::RateLimited + } else if status.is_server_error() || status == StatusCode::REQUEST_TIMEOUT { + EmbeddingError::ProviderUnavailable + } else { + EmbeddingError::Response + }; + // Log the provider response server-side rather than attaching it to the returned + // `Report`: the API layer serializes error reports back to clients, so the body (which + // can echo request details) must not ride along on the error. + tracing::error!(%status, %body, "OpenAI embeddings request failed"); + return Err(Report::new(context)); + } + + let mut response = response + .json::() + .await + .change_context(EmbeddingError::Response)?; + + if response.data.len() != inputs.len() { + return Err(Report::new(EmbeddingError::UnexpectedCount)).attach(format!( + "expected {} embeddings, got {}", + inputs.len(), + response.data.len() + )); + } + + // The OpenAI API returns embeddings in input order, but we sort defensively by `index` and + // verify the indices form a contiguous `0..n` range, so a malformed-but-count-correct + // response cannot silently pair an input with the wrong embedding. + response.data.sort_unstable_by_key(|datum| datum.index); + if response + .data + .iter() + .enumerate() + .any(|(position, datum)| datum.index != position) + { + return Err(Report::new(EmbeddingError::Response)) + .attach("provider returned non-contiguous embedding indices"); + } + + response + .data + .into_iter() + .map(|datum| { + if datum.embedding.len() != Embedding::DIM { + return Err(Report::new(EmbeddingError::UnexpectedDimensions)).attach(format!( + "expected {} dimensions, got {}", + Embedding::DIM, + datum.embedding.len() + )); + } + Ok(Embedding::from(datum.embedding)) + }) + .collect() + } +} diff --git a/libs/@local/graph/sdk/typescript/src/embeddings.ts b/libs/@local/graph/sdk/typescript/src/embeddings.ts index 5982aef1b53..b149b0ee1b8 100644 --- a/libs/@local/graph/sdk/typescript/src/embeddings.ts +++ b/libs/@local/graph/sdk/typescript/src/embeddings.ts @@ -1,8 +1,3 @@ -import { v4 as uuidv4 } from "uuid"; - -import type { Embedding } from "@local/hash-graph-client"; -import type { Client } from "@temporalio/client"; - export type CreateEmbeddingsParams = { input: string[]; }; @@ -14,27 +9,3 @@ export type CreateEmbeddingsReturn = { total_tokens: number; }; }; - -// TODO(BE-622): Move to the graph -export const calculateEmbedding = async ( - semanticString: string, - temporalClient: Client, -): Promise => { - const { embeddings } = await temporalClient.workflow.execute< - (params: CreateEmbeddingsParams) => Promise - >("createEmbeddings", { - taskQueue: "ai", - args: [ - { - input: [semanticString], - }, - ], - workflowId: uuidv4(), - }); - - if (embeddings.length === 0) { - throw new Error("No embeddings returned"); - } - - return embeddings[0]!; -}; diff --git a/libs/@local/graph/sdk/typescript/src/entity-type.ts b/libs/@local/graph/sdk/typescript/src/entity-type.ts index 0fe8f6932c9..07f91e327fe 100644 --- a/libs/@local/graph/sdk/typescript/src/entity-type.ts +++ b/libs/@local/graph/sdk/typescript/src/entity-type.ts @@ -1,4 +1,3 @@ -import { calculateEmbedding } from "./embeddings.js"; import { deserializeGraphVertices, mapGraphApiSubgraphToSubgraph, @@ -44,7 +43,6 @@ import type { SearchEntityTypesResponse as SearchEntityTypesResponseGraphApi, } from "@local/hash-graph-client"; import type { ActionName } from "@rust/hash-graph-authorization/types"; -import type { Client as TemporalClient } from "@temporalio/client"; export const hasPermissionForEntityTypes = ( graphAPI: GraphApi, @@ -140,32 +138,16 @@ export type SearchEntityTypesResponse = Omit< export const searchEntityTypes = async ( context: { graphApi: GraphApi; - temporalClient?: TemporalClient; }, authentication: AuthenticationContext, params: SearchEntityTypesRequest, -): Promise => { - // TODO(BE-622): Move to the graph - if (params.semanticString !== undefined) { - if (!context.temporalClient) { - throw new Error("Cannot query semantic string without temporal client"); - } - const { semanticString, ...rest } = params; - const embedding = await calculateEmbedding( - semanticString, - context.temporalClient, - ); - // eslint-disable-next-line no-param-reassign - params = { ...rest, embedding }; - } - - return context.graphApi +): Promise => + context.graphApi .searchEntityTypes(authentication.actorId, params) .then(({ data: response }) => ({ ...response, entityTypes: mapGraphApiEntityTypesToEntityTypes(response.entityTypes), })); -}; export type QueryEntityTypeSubgraphParams = ExclusiveUnion< DistributiveReplaceProperties< diff --git a/libs/@local/graph/sdk/typescript/src/entity.ts b/libs/@local/graph/sdk/typescript/src/entity.ts index da922062764..ac1405e9109 100644 --- a/libs/@local/graph/sdk/typescript/src/entity.ts +++ b/libs/@local/graph/sdk/typescript/src/entity.ts @@ -7,7 +7,6 @@ import { } from "@blockprotocol/type-system"; import { typedEntries, typedKeys } from "@local/advanced-types/typed-entries"; -import { calculateEmbedding } from "./embeddings.js"; import { mapGraphApiClosedMultiEntityTypeMapToClosedMultiEntityTypeMap, mapGraphApiEntityTypeResolveDefinitionsToEntityTypeResolveDefinitions, @@ -108,7 +107,6 @@ import type { CreateEntityPolicyParams, EntityPermissions, } from "@rust/hash-graph-store/types"; -import type { Client as TemporalClient } from "@temporalio/client"; export type BrandedPropertyObject> = T & { @@ -1539,26 +1537,11 @@ export const queryEntities = async < export const searchEntities = async ( context: { graphApi: GraphApi; - temporalClient?: TemporalClient; }, authentication: AuthenticationContext, params: SearchEntitiesRequest, -): Promise => { - // TODO(BE-622): Move to the graph - if (params.semanticString !== undefined) { - if (!context.temporalClient) { - throw new Error("Cannot query semantic string without temporal client"); - } - const { semanticString, ...rest } = params; - const embedding = await calculateEmbedding( - semanticString, - context.temporalClient, - ); - // eslint-disable-next-line no-param-reassign - params = { ...rest, embedding }; - } - - return context.graphApi +): Promise => + context.graphApi .searchEntities(authentication.actorId, params) .then(({ data: response }) => ({ ...response, @@ -1569,7 +1552,6 @@ export const searchEntities = async ( ) : undefined, })); -}; export const summarizeEntities = async ( context: { diff --git a/libs/@local/graph/store/docs/dependency-diagram.mmd b/libs/@local/graph/store/docs/dependency-diagram.mmd index d0627e3dd8b..0ba92b32822 100644 --- a/libs/@local/graph/store/docs/dependency-diagram.mmd +++ b/libs/@local/graph/store/docs/dependency-diagram.mmd @@ -14,59 +14,62 @@ graph TD 3[hash-codegen] 4[hash-graph-api] 5[hash-graph-authorization] - 6[hash-graph-postgres-store] - 7[hash-graph-store] - class 7 root - 8[hash-graph-temporal-versioning] - 9[hash-graph-type-fetcher] - 10[hash-graph-types] - 11[hash-graph-validation] - 12[harpc-server] - 13[harpc-types] - 14[harpc-wire-protocol] - 15[hashql-ast] - 16[hashql-compiletest] - 17[hashql-eval] - 18[hashql-hir] - 19[hashql-mir] - 20[hashql-syntax-jexpr] - 21[hash-temporal-client] - 22[error-stack] - 23[hash-graph-benches] - 24[hash-graph-integration] - 25[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-postgres-store] + 8[hash-graph-store] + class 8 root + 9[hash-graph-temporal-versioning] + 10[hash-graph-type-fetcher] + 11[hash-graph-types] + 12[hash-graph-validation] + 13[harpc-server] + 14[harpc-types] + 15[harpc-wire-protocol] + 16[hashql-ast] + 17[hashql-compiletest] + 18[hashql-eval] + 19[hashql-hir] + 20[hashql-mir] + 21[hashql-syntax-jexpr] + 22[hash-temporal-client] + 23[error-stack] + 24[hash-graph-benches] + 25[hash-graph-integration] + 26[hash-graph-test-data] 0 --> 4 - 1 --> 8 - 1 -.-> 25 + 1 --> 9 + 1 -.-> 26 2 -.-> 3 - 2 --> 14 - 4 --> 9 - 4 --> 12 - 4 --> 17 - 4 --> 20 + 2 --> 15 + 4 --> 6 + 4 --> 10 + 4 --> 13 + 4 --> 18 + 4 --> 21 5 --> 1 6 --> 11 - 7 --> 5 - 7 --> 10 - 7 --> 21 - 8 --> 2 - 9 --> 7 - 10 -.-> 25 - 11 -.-> 25 - 12 -.-> 1 - 12 --> 13 - 14 -.-> 13 - 14 --> 13 - 14 --> 22 - 15 -.-> 16 - 16 --> 17 - 16 --> 20 - 17 --> 6 - 17 --> 19 - 18 -.-> 16 - 19 --> 18 - 20 --> 15 - 21 --> 1 - 23 -.-> 4 - 24 -.-> 6 - 25 --> 7 + 7 --> 12 + 8 --> 5 + 8 --> 11 + 8 --> 22 + 9 --> 2 + 10 --> 8 + 11 -.-> 26 + 12 -.-> 26 + 13 -.-> 1 + 13 --> 14 + 15 -.-> 14 + 15 --> 14 + 15 --> 23 + 16 -.-> 17 + 17 --> 18 + 17 --> 21 + 18 --> 7 + 18 --> 20 + 19 -.-> 17 + 20 --> 19 + 21 --> 16 + 22 --> 1 + 24 -.-> 4 + 25 -.-> 7 + 26 --> 8 diff --git a/libs/@local/graph/temporal-versioning/docs/dependency-diagram.mmd b/libs/@local/graph/temporal-versioning/docs/dependency-diagram.mmd index 146d1f192df..3df7e86b38a 100644 --- a/libs/@local/graph/temporal-versioning/docs/dependency-diagram.mmd +++ b/libs/@local/graph/temporal-versioning/docs/dependency-diagram.mmd @@ -14,59 +14,62 @@ graph TD 3[hash-codegen] 4[hash-graph-api] 5[hash-graph-authorization] - 6[hash-graph-postgres-store] - 7[hash-graph-store] - 8[hash-graph-temporal-versioning] - class 8 root - 9[hash-graph-type-fetcher] - 10[hash-graph-types] - 11[hash-graph-validation] - 12[harpc-server] - 13[harpc-types] - 14[harpc-wire-protocol] - 15[hashql-ast] - 16[hashql-compiletest] - 17[hashql-eval] - 18[hashql-hir] - 19[hashql-mir] - 20[hashql-syntax-jexpr] - 21[hash-temporal-client] - 22[error-stack] - 23[hash-graph-benches] - 24[hash-graph-integration] - 25[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-postgres-store] + 8[hash-graph-store] + 9[hash-graph-temporal-versioning] + class 9 root + 10[hash-graph-type-fetcher] + 11[hash-graph-types] + 12[hash-graph-validation] + 13[harpc-server] + 14[harpc-types] + 15[harpc-wire-protocol] + 16[hashql-ast] + 17[hashql-compiletest] + 18[hashql-eval] + 19[hashql-hir] + 20[hashql-mir] + 21[hashql-syntax-jexpr] + 22[hash-temporal-client] + 23[error-stack] + 24[hash-graph-benches] + 25[hash-graph-integration] + 26[hash-graph-test-data] 0 --> 4 - 1 --> 8 - 1 -.-> 25 + 1 --> 9 + 1 -.-> 26 2 -.-> 3 - 2 --> 14 - 4 --> 9 - 4 --> 12 - 4 --> 17 - 4 --> 20 + 2 --> 15 + 4 --> 6 + 4 --> 10 + 4 --> 13 + 4 --> 18 + 4 --> 21 5 --> 1 6 --> 11 - 7 --> 5 - 7 --> 10 - 7 --> 21 - 8 --> 2 - 9 --> 7 - 10 -.-> 25 - 11 -.-> 25 - 12 -.-> 1 - 12 --> 13 - 14 -.-> 13 - 14 --> 13 - 14 --> 22 - 15 -.-> 16 - 16 --> 17 - 16 --> 20 - 17 --> 6 - 17 --> 19 - 18 -.-> 16 - 19 --> 18 - 20 --> 15 - 21 --> 1 - 23 -.-> 4 - 24 -.-> 6 - 25 --> 7 + 7 --> 12 + 8 --> 5 + 8 --> 11 + 8 --> 22 + 9 --> 2 + 10 --> 8 + 11 -.-> 26 + 12 -.-> 26 + 13 -.-> 1 + 13 --> 14 + 15 -.-> 14 + 15 --> 14 + 15 --> 23 + 16 -.-> 17 + 17 --> 18 + 17 --> 21 + 18 --> 7 + 18 --> 20 + 19 -.-> 17 + 20 --> 19 + 21 --> 16 + 22 --> 1 + 24 -.-> 4 + 25 -.-> 7 + 26 --> 8 diff --git a/libs/@local/graph/types/docs/dependency-diagram.mmd b/libs/@local/graph/types/docs/dependency-diagram.mmd index 5ddabb57b44..64f359477cc 100644 --- a/libs/@local/graph/types/docs/dependency-diagram.mmd +++ b/libs/@local/graph/types/docs/dependency-diagram.mmd @@ -14,59 +14,62 @@ graph TD 3[hash-codegen] 4[hash-graph-api] 5[hash-graph-authorization] - 6[hash-graph-postgres-store] - 7[hash-graph-store] - 8[hash-graph-temporal-versioning] - 9[hash-graph-type-fetcher] - 10[hash-graph-types] - class 10 root - 11[hash-graph-validation] - 12[harpc-server] - 13[harpc-types] - 14[harpc-wire-protocol] - 15[hashql-ast] - 16[hashql-compiletest] - 17[hashql-eval] - 18[hashql-hir] - 19[hashql-mir] - 20[hashql-syntax-jexpr] - 21[hash-temporal-client] - 22[error-stack] - 23[hash-graph-benches] - 24[hash-graph-integration] - 25[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-postgres-store] + 8[hash-graph-store] + 9[hash-graph-temporal-versioning] + 10[hash-graph-type-fetcher] + 11[hash-graph-types] + class 11 root + 12[hash-graph-validation] + 13[harpc-server] + 14[harpc-types] + 15[harpc-wire-protocol] + 16[hashql-ast] + 17[hashql-compiletest] + 18[hashql-eval] + 19[hashql-hir] + 20[hashql-mir] + 21[hashql-syntax-jexpr] + 22[hash-temporal-client] + 23[error-stack] + 24[hash-graph-benches] + 25[hash-graph-integration] + 26[hash-graph-test-data] 0 --> 4 - 1 --> 8 - 1 -.-> 25 + 1 --> 9 + 1 -.-> 26 2 -.-> 3 - 2 --> 14 - 4 --> 9 - 4 --> 12 - 4 --> 17 - 4 --> 20 + 2 --> 15 + 4 --> 6 + 4 --> 10 + 4 --> 13 + 4 --> 18 + 4 --> 21 5 --> 1 6 --> 11 - 7 --> 5 - 7 --> 10 - 7 --> 21 - 8 --> 2 - 9 --> 7 - 10 -.-> 25 - 11 -.-> 25 - 12 -.-> 1 - 12 --> 13 - 14 -.-> 13 - 14 --> 13 - 14 --> 22 - 15 -.-> 16 - 16 --> 17 - 16 --> 20 - 17 --> 6 - 17 --> 19 - 18 -.-> 16 - 19 --> 18 - 20 --> 15 - 21 --> 1 - 23 -.-> 4 - 24 -.-> 6 - 25 --> 7 + 7 --> 12 + 8 --> 5 + 8 --> 11 + 8 --> 22 + 9 --> 2 + 10 --> 8 + 11 -.-> 26 + 12 -.-> 26 + 13 -.-> 1 + 13 --> 14 + 15 -.-> 14 + 15 --> 14 + 15 --> 23 + 16 -.-> 17 + 17 --> 18 + 17 --> 21 + 18 --> 7 + 18 --> 20 + 19 -.-> 17 + 20 --> 19 + 21 --> 16 + 22 --> 1 + 24 -.-> 4 + 25 -.-> 7 + 26 --> 8 diff --git a/libs/@local/graph/types/src/embedding.rs b/libs/@local/graph/types/src/embedding.rs index 96722ed4e3c..22c9da3b8e8 100644 --- a/libs/@local/graph/types/src/embedding.rs +++ b/libs/@local/graph/types/src/embedding.rs @@ -20,6 +20,18 @@ impl Embedding<'_> { self.0.iter().copied() } + /// Returns the number of dimensions in this embedding. + #[must_use] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns `true` if this embedding has no dimensions. + #[must_use] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + #[must_use] pub fn into_owned(self) -> Embedding<'static> { Embedding(Cow::Owned(self.0.into_owned())) diff --git a/libs/@local/harpc/types/docs/dependency-diagram.mmd b/libs/@local/harpc/types/docs/dependency-diagram.mmd index a33077303be..91c4ae06387 100644 --- a/libs/@local/harpc/types/docs/dependency-diagram.mmd +++ b/libs/@local/harpc/types/docs/dependency-diagram.mmd @@ -13,72 +13,75 @@ graph TD 2[hash-codec] 3[hash-graph-api] 4[hash-graph-authorization] - 5[hash-graph-postgres-store] - 6[hash-graph-store] - 7[hash-graph-temporal-versioning] - 8[hash-graph-type-fetcher] - 9[hash-graph-types] - 10[hash-graph-validation] - 11[harpc-client] - 12[harpc-codec] - 13[harpc-net] - 14[harpc-server] - 15[harpc-system] - 16[harpc-tower] - 17[harpc-types] - class 17 root - 18[harpc-wire-protocol] - 19[hashql-ast] - 20[hashql-compiletest] - 21[hashql-core] - 22[hashql-eval] - 23[hashql-hir] - 24[hashql-mir] - 25[hashql-syntax-jexpr] - 26[hash-temporal-client] - 27[hash-graph-benches] - 28[hash-graph-integration] - 29[hash-graph-test-data] + 5[hash-graph-embeddings] + 6[hash-graph-postgres-store] + 7[hash-graph-store] + 8[hash-graph-temporal-versioning] + 9[hash-graph-type-fetcher] + 10[hash-graph-types] + 11[hash-graph-validation] + 12[harpc-client] + 13[harpc-codec] + 14[harpc-net] + 15[harpc-server] + 16[harpc-system] + 17[harpc-tower] + 18[harpc-types] + class 18 root + 19[harpc-wire-protocol] + 20[hashql-ast] + 21[hashql-compiletest] + 22[hashql-core] + 23[hashql-eval] + 24[hashql-hir] + 25[hashql-mir] + 26[hashql-syntax-jexpr] + 27[hash-temporal-client] + 28[hash-graph-benches] + 29[hash-graph-integration] + 30[hash-graph-test-data] 0 --> 3 - 1 --> 7 - 1 -.-> 29 - 2 --> 18 - 3 --> 8 - 3 --> 14 - 3 --> 22 - 3 --> 25 + 1 --> 8 + 1 -.-> 30 + 2 --> 19 + 3 --> 5 + 3 --> 9 + 3 --> 15 + 3 --> 23 + 3 --> 26 4 --> 1 5 --> 10 - 6 --> 4 - 6 --> 9 - 6 --> 26 - 7 --> 2 - 8 --> 6 - 9 -.-> 29 - 10 -.-> 29 - 11 --> 15 - 12 --> 17 - 13 --> 2 - 13 -.-> 12 - 13 --> 12 - 14 -.-> 1 - 14 -.-> 11 - 15 --> 16 - 16 -.-> 13 - 16 --> 13 - 18 -.-> 17 - 18 --> 17 - 19 -.-> 20 - 20 --> 22 - 20 --> 25 - 21 --> 2 - 22 --> 5 - 22 --> 24 - 23 -.-> 20 - 24 --> 23 - 25 --> 19 - 25 --> 21 - 26 --> 1 - 27 -.-> 3 - 28 -.-> 5 - 29 --> 6 + 6 --> 11 + 7 --> 4 + 7 --> 10 + 7 --> 27 + 8 --> 2 + 9 --> 7 + 10 -.-> 30 + 11 -.-> 30 + 12 --> 16 + 13 --> 18 + 14 --> 2 + 14 -.-> 13 + 14 --> 13 + 15 -.-> 1 + 15 -.-> 12 + 16 --> 17 + 17 -.-> 14 + 17 --> 14 + 19 -.-> 18 + 19 --> 18 + 20 -.-> 21 + 21 --> 23 + 21 --> 26 + 22 --> 2 + 23 --> 6 + 23 --> 25 + 24 -.-> 21 + 25 --> 24 + 26 --> 20 + 26 --> 22 + 27 --> 1 + 28 -.-> 3 + 29 -.-> 6 + 30 --> 7 diff --git a/libs/@local/harpc/wire-protocol/docs/dependency-diagram.mmd b/libs/@local/harpc/wire-protocol/docs/dependency-diagram.mmd index 4bb5a7ed3f0..e665aaf4f40 100644 --- a/libs/@local/harpc/wire-protocol/docs/dependency-diagram.mmd +++ b/libs/@local/harpc/wire-protocol/docs/dependency-diagram.mmd @@ -13,70 +13,73 @@ graph TD 2[hash-codec] 3[hash-graph-api] 4[hash-graph-authorization] - 5[hash-graph-postgres-store] - 6[hash-graph-store] - 7[hash-graph-temporal-versioning] - 8[hash-graph-type-fetcher] - 9[hash-graph-types] - 10[hash-graph-validation] - 11[harpc-client] - 12[harpc-net] - 13[harpc-server] - 14[harpc-system] - 15[harpc-tower] - 16[harpc-types] - 17[harpc-wire-protocol] - class 17 root - 18[hashql-ast] - 19[hashql-compiletest] - 20[hashql-core] - 21[hashql-eval] - 22[hashql-hir] - 23[hashql-mir] - 24[hashql-syntax-jexpr] - 25[hash-temporal-client] - 26[error-stack] - 27[hash-graph-benches] - 28[hash-graph-integration] - 29[hash-graph-test-data] + 5[hash-graph-embeddings] + 6[hash-graph-postgres-store] + 7[hash-graph-store] + 8[hash-graph-temporal-versioning] + 9[hash-graph-type-fetcher] + 10[hash-graph-types] + 11[hash-graph-validation] + 12[harpc-client] + 13[harpc-net] + 14[harpc-server] + 15[harpc-system] + 16[harpc-tower] + 17[harpc-types] + 18[harpc-wire-protocol] + class 18 root + 19[hashql-ast] + 20[hashql-compiletest] + 21[hashql-core] + 22[hashql-eval] + 23[hashql-hir] + 24[hashql-mir] + 25[hashql-syntax-jexpr] + 26[hash-temporal-client] + 27[error-stack] + 28[hash-graph-benches] + 29[hash-graph-integration] + 30[hash-graph-test-data] 0 --> 3 - 1 --> 7 - 1 -.-> 29 - 2 --> 17 - 3 --> 8 - 3 --> 13 - 3 --> 21 - 3 --> 24 + 1 --> 8 + 1 -.-> 30 + 2 --> 18 + 3 --> 5 + 3 --> 9 + 3 --> 14 + 3 --> 22 + 3 --> 25 4 --> 1 5 --> 10 - 6 --> 4 - 6 --> 9 - 6 --> 25 - 7 --> 2 - 8 --> 6 - 9 -.-> 29 - 10 -.-> 29 - 11 --> 14 - 12 --> 2 - 13 -.-> 1 - 13 -.-> 11 - 14 --> 15 - 15 -.-> 12 - 15 --> 12 - 17 -.-> 16 - 17 --> 16 - 17 --> 26 - 18 -.-> 19 - 19 --> 21 - 19 --> 24 - 20 --> 2 - 21 --> 5 - 21 --> 23 - 22 -.-> 19 - 23 --> 22 - 24 --> 18 - 24 --> 20 - 25 --> 1 - 27 -.-> 3 - 28 -.-> 5 - 29 --> 6 + 6 --> 11 + 7 --> 4 + 7 --> 10 + 7 --> 26 + 8 --> 2 + 9 --> 7 + 10 -.-> 30 + 11 -.-> 30 + 12 --> 15 + 13 --> 2 + 14 -.-> 1 + 14 -.-> 12 + 15 --> 16 + 16 -.-> 13 + 16 --> 13 + 18 -.-> 17 + 18 --> 17 + 18 --> 27 + 19 -.-> 20 + 20 --> 22 + 20 --> 25 + 21 --> 2 + 22 --> 6 + 22 --> 24 + 23 -.-> 20 + 24 --> 23 + 25 --> 19 + 25 --> 21 + 26 --> 1 + 28 -.-> 3 + 29 -.-> 6 + 30 --> 7 diff --git a/libs/@local/temporal-client/docs/dependency-diagram.mmd b/libs/@local/temporal-client/docs/dependency-diagram.mmd index 2550d3da4f3..65bcb84bf66 100644 --- a/libs/@local/temporal-client/docs/dependency-diagram.mmd +++ b/libs/@local/temporal-client/docs/dependency-diagram.mmd @@ -14,59 +14,62 @@ graph TD 3[hash-codegen] 4[hash-graph-api] 5[hash-graph-authorization] - 6[hash-graph-postgres-store] - 7[hash-graph-store] - 8[hash-graph-temporal-versioning] - 9[hash-graph-type-fetcher] - 10[hash-graph-types] - 11[hash-graph-validation] - 12[harpc-server] - 13[harpc-types] - 14[harpc-wire-protocol] - 15[hashql-ast] - 16[hashql-compiletest] - 17[hashql-eval] - 18[hashql-hir] - 19[hashql-mir] - 20[hashql-syntax-jexpr] - 21[hash-temporal-client] - class 21 root - 22[error-stack] - 23[hash-graph-benches] - 24[hash-graph-integration] - 25[hash-graph-test-data] + 6[hash-graph-embeddings] + 7[hash-graph-postgres-store] + 8[hash-graph-store] + 9[hash-graph-temporal-versioning] + 10[hash-graph-type-fetcher] + 11[hash-graph-types] + 12[hash-graph-validation] + 13[harpc-server] + 14[harpc-types] + 15[harpc-wire-protocol] + 16[hashql-ast] + 17[hashql-compiletest] + 18[hashql-eval] + 19[hashql-hir] + 20[hashql-mir] + 21[hashql-syntax-jexpr] + 22[hash-temporal-client] + class 22 root + 23[error-stack] + 24[hash-graph-benches] + 25[hash-graph-integration] + 26[hash-graph-test-data] 0 --> 4 - 1 --> 8 - 1 -.-> 25 + 1 --> 9 + 1 -.-> 26 2 -.-> 3 - 2 --> 14 - 4 --> 9 - 4 --> 12 - 4 --> 17 - 4 --> 20 + 2 --> 15 + 4 --> 6 + 4 --> 10 + 4 --> 13 + 4 --> 18 + 4 --> 21 5 --> 1 6 --> 11 - 7 --> 5 - 7 --> 10 - 7 --> 21 - 8 --> 2 - 9 --> 7 - 10 -.-> 25 - 11 -.-> 25 - 12 -.-> 1 - 12 --> 13 - 14 -.-> 13 - 14 --> 13 - 14 --> 22 - 15 -.-> 16 - 16 --> 17 - 16 --> 20 - 17 --> 6 - 17 --> 19 - 18 -.-> 16 - 19 --> 18 - 20 --> 15 - 21 --> 1 - 23 -.-> 4 - 24 -.-> 6 - 25 --> 7 + 7 --> 12 + 8 --> 5 + 8 --> 11 + 8 --> 22 + 9 --> 2 + 10 --> 8 + 11 -.-> 26 + 12 -.-> 26 + 13 -.-> 1 + 13 --> 14 + 15 -.-> 14 + 15 --> 14 + 15 --> 23 + 16 -.-> 17 + 17 --> 18 + 17 --> 21 + 18 --> 7 + 18 --> 20 + 19 -.-> 17 + 20 --> 19 + 21 --> 16 + 22 --> 1 + 24 -.-> 4 + 25 -.-> 7 + 26 --> 8 diff --git a/tests/graph/test-data/rust/docs/dependency-diagram.mmd b/tests/graph/test-data/rust/docs/dependency-diagram.mmd index 2ac8e9e7481..8418e7f6ed1 100644 --- a/tests/graph/test-data/rust/docs/dependency-diagram.mmd +++ b/tests/graph/test-data/rust/docs/dependency-diagram.mmd @@ -14,59 +14,62 @@ graph TD 3[hash-codegen] 4[hash-graph-api] 5[hash-graph-authorization] - 6[hash-graph-postgres-store] - 7[hash-graph-store] - 8[hash-graph-temporal-versioning] - 9[hash-graph-type-fetcher] - 10[hash-graph-types] - 11[hash-graph-validation] - 12[harpc-server] - 13[harpc-types] - 14[harpc-wire-protocol] - 15[hashql-ast] - 16[hashql-compiletest] - 17[hashql-eval] - 18[hashql-hir] - 19[hashql-mir] - 20[hashql-syntax-jexpr] - 21[hash-temporal-client] - 22[error-stack] - 23[hash-graph-benches] - 24[hash-graph-integration] - 25[hash-graph-test-data] - class 25 root + 6[hash-graph-embeddings] + 7[hash-graph-postgres-store] + 8[hash-graph-store] + 9[hash-graph-temporal-versioning] + 10[hash-graph-type-fetcher] + 11[hash-graph-types] + 12[hash-graph-validation] + 13[harpc-server] + 14[harpc-types] + 15[harpc-wire-protocol] + 16[hashql-ast] + 17[hashql-compiletest] + 18[hashql-eval] + 19[hashql-hir] + 20[hashql-mir] + 21[hashql-syntax-jexpr] + 22[hash-temporal-client] + 23[error-stack] + 24[hash-graph-benches] + 25[hash-graph-integration] + 26[hash-graph-test-data] + class 26 root 0 --> 4 - 1 --> 8 - 1 -.-> 25 + 1 --> 9 + 1 -.-> 26 2 -.-> 3 - 2 --> 14 - 4 --> 9 - 4 --> 12 - 4 --> 17 - 4 --> 20 + 2 --> 15 + 4 --> 6 + 4 --> 10 + 4 --> 13 + 4 --> 18 + 4 --> 21 5 --> 1 6 --> 11 - 7 --> 5 - 7 --> 10 - 7 --> 21 - 8 --> 2 - 9 --> 7 - 10 -.-> 25 - 11 -.-> 25 - 12 -.-> 1 - 12 --> 13 - 14 -.-> 13 - 14 --> 13 - 14 --> 22 - 15 -.-> 16 - 16 --> 17 - 16 --> 20 - 17 --> 6 - 17 --> 19 - 18 -.-> 16 - 19 --> 18 - 20 --> 15 - 21 --> 1 - 23 -.-> 4 - 24 -.-> 6 - 25 --> 7 + 7 --> 12 + 8 --> 5 + 8 --> 11 + 8 --> 22 + 9 --> 2 + 10 --> 8 + 11 -.-> 26 + 12 -.-> 26 + 13 -.-> 1 + 13 --> 14 + 15 -.-> 14 + 15 --> 14 + 15 --> 23 + 16 -.-> 17 + 17 --> 18 + 17 --> 21 + 18 --> 7 + 18 --> 20 + 19 -.-> 17 + 20 --> 19 + 21 --> 16 + 22 --> 1 + 24 -.-> 4 + 25 -.-> 7 + 26 --> 8 diff --git a/yarn.lock b/yarn.lock index 0d7f6da54a1..1d9b8730fee 100644 --- a/yarn.lock +++ b/yarn.lock @@ -735,6 +735,7 @@ __metadata: "@rust/hash-codec": "workspace:*" "@rust/hash-graph-api": "workspace:*" "@rust/hash-graph-authorization": "workspace:*" + "@rust/hash-graph-embeddings": "workspace:*" "@rust/hash-graph-postgres-store": "workspace:*" "@rust/hash-graph-store": "workspace:*" "@rust/hash-graph-type-fetcher": "workspace:*" @@ -13067,6 +13068,7 @@ __metadata: "@rust/harpc-types": "workspace:*" "@rust/hash-codec": "workspace:*" "@rust/hash-graph-authorization": "workspace:*" + "@rust/hash-graph-embeddings": "workspace:*" "@rust/hash-graph-postgres-store": "workspace:*" "@rust/hash-graph-store": "workspace:*" "@rust/hash-graph-temporal-versioning": "workspace:*" @@ -13117,6 +13119,15 @@ __metadata: languageName: unknown linkType: soft +"@rust/hash-graph-embeddings@workspace:*, @rust/hash-graph-embeddings@workspace:libs/@local/graph/embeddings": + version: 0.0.0-use.local + resolution: "@rust/hash-graph-embeddings@workspace:libs/@local/graph/embeddings" + dependencies: + "@rust/error-stack": "workspace:*" + "@rust/hash-graph-types": "workspace:*" + languageName: unknown + linkType: soft + "@rust/hash-graph-http-tests@workspace:tests/graph/http": version: 0.0.0-use.local resolution: "@rust/hash-graph-http-tests@workspace:tests/graph/http"