From 6a893dff56ea1386986b2130e5fe598041a4ab64 Mon Sep 17 00:00:00 2001 From: Gabe Pesco Date: Tue, 19 May 2026 13:51:30 -0400 Subject: [PATCH] Fix: create_external_models misclassifies cross-repo models as external (#5326) In a multi-repo setup, create_external_models_file partitioned models by config and processed each repo in isolation. This caused dependencies on models defined in another repo to be treated as missing, and therefore classified as external. Fix: add an optional all_models parameter to create_external_models_file. When provided (as self._models, which spans all loaded repos), a dependency is only classified as external if it is absent from the full set. The per-repo models argument still controls which external_models.yaml gets written. Closes #5326 Signed-off-by: Gabe Pesco --- sqlmesh/core/context.py | 1 + sqlmesh/core/schema_loader.py | 9 +++- tests/core/integration/test_multi_repo.py | 56 +++++++++++++++++++++++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index f1a7657704..2861adbeda 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -2495,6 +2495,7 @@ def create_external_models(self, strict: bool = False) -> None: gateway=external_models_gateway, max_workers=self.concurrent_tasks, strict=strict, + all_models=self._models, ) @python_api_analytics diff --git a/sqlmesh/core/schema_loader.py b/sqlmesh/core/schema_loader.py index 52ab807c78..3fed8c5c09 100644 --- a/sqlmesh/core/schema_loader.py +++ b/sqlmesh/core/schema_loader.py @@ -24,26 +24,31 @@ def create_external_models_file( gateway: t.Optional[str] = None, max_workers: int = 1, strict: bool = False, + all_models: t.Optional[t.Dict[str, Model]] = None, ) -> None: """Create or replace a YAML file with column and types of all columns in all external models. Args: path: The path to store the YAML file. - models: FQN to model + models: FQN to model for the current repo/config being processed. adapter: The engine adapter. state_reader: The state reader. dialect: The dialect to serialize the schema as. gateway: If the model should be associated with a specific gateway; the gateway key max_workers: The max concurrent workers to fetch columns. strict: If True, raise an error if the external model is missing in the database. + all_models: FQN to model across all loaded repos. When provided, a dependency is only + classified as external if it is absent from this full set. This prevents cross-repo + internal models from being misclassified as external in multi-repo setups. """ + known_models: t.Dict[str, Model] = all_models if all_models is not None else models external_model_fqns = set() for fqn, model in models.items(): if model.kind.is_external: external_model_fqns.add(fqn) for dep in model.depends_on: - if dep not in models: + if dep not in known_models: external_model_fqns.add(dep) # Make sure we don't convert internal models into external ones. diff --git a/tests/core/integration/test_multi_repo.py b/tests/core/integration/test_multi_repo.py index 4d72d137b3..0b38231664 100644 --- a/tests/core/integration/test_multi_repo.py +++ b/tests/core/integration/test_multi_repo.py @@ -21,6 +21,7 @@ ) from sqlmesh.core.console import get_console from sqlmesh.core.context import Context +from sqlmesh.utils import yaml from sqlmesh.utils.date import now from tests.conftest import DuckDBMetadata from tests.utils.test_helpers import use_terminal_console @@ -559,3 +560,58 @@ def test_engine_adapters_multi_repo_all_gateways_gathered(copy_to_temp_path): gathered_gateways = context.engine_adapters.keys() expected_gateways = {"local", "memory", "extra"} assert gathered_gateways == expected_gateways + + +@use_terminal_console +def test_multi_repo_create_external_models(copy_to_temp_path): + """create_external_models should not classify cross-repo models as external (sqlmesh#5326). + + silver.c and silver.e (repo_2) depend on bronze.a (repo_1). When running + create_external_models with both repos loaded, bronze.a must NOT be treated + as an external model because it is an internal model defined in repo_1. + + The observable symptom of the bug is a warning: "Unable to get schema for + 'bronze.a'" — because SQLMesh tries to query the schema of what it wrongly + thinks is an external table. A correct implementation never attempts this + lookup and therefore emits no such warning. + """ + paths = copy_to_temp_path("examples/multi") + repo_1_path = f"{paths[0]}/repo_1" + repo_2_path = f"{paths[0]}/repo_2" + + context = Context(paths=[repo_1_path, repo_2_path], gateway="memory") + context._new_state_sync().reset(default_catalog=context.default_catalog) + + with patch.object(context.console, "log_warning") as mock_warning: + context.create_external_models() + + warning_messages = [str(call) for call in mock_warning.call_args_list] + schema_lookup_warnings = [ + msg + for msg in warning_messages + if "bronze" in msg and "a" in msg and "schema" in msg.lower() + ] + assert not schema_lookup_warnings, ( + "bronze.a should not be looked up as an external model, but got warnings: " + + str(schema_lookup_warnings) + ) + + # repo_2's external_models.yaml must not contain bronze.a + repo_2_external = Path(repo_2_path) / c.EXTERNAL_MODELS_YAML + if repo_2_external.exists(): + contents = yaml.load(repo_2_external) + external_names = [e["name"] for e in contents] + assert not any("bronze" in name and "a" in name for name in external_names), ( + f"bronze.a should not be in repo_2's external models, but found: {external_names}" + ) + + # repo_1 has no external dependencies at all + repo_1_external = Path(repo_1_path) / c.EXTERNAL_MODELS_YAML + if repo_1_external.exists(): + contents = yaml.load(repo_1_external) + assert len(contents) == 0, f"repo_1 should have no external models, got: {contents}" + + # Plan should still resolve all 5 models as internal after create_external_models + context.load() + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 5