diff --git a/.changes/unreleased/fixed-20260430-130558.yaml b/.changes/unreleased/fixed-20260430-130558.yaml new file mode 100644 index 000000000..16dd7fabb --- /dev/null +++ b/.changes/unreleased/fixed-20260430-130558.yaml @@ -0,0 +1,6 @@ +kind: fixed +body: Refactor `fab tables schema` to use the `deltalake` Python library for schema extraction via ABFSS URI instead of manually parsing Delta log commit files +time: 2026-04-30T13:05:58.364670843Z +custom: + Author: pkontek + AuthorLink: https://github.com/pkontek diff --git a/pyproject.toml b/pyproject.toml index f2c52dc5d..a78ce9d6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "requests", "cryptography", "fabric-cicd>=0.3.1", + "deltalake>=1.0.0,<2.0.0", ] [project.scripts] diff --git a/requirements-dev.txt b/requirements-dev.txt index 1b831e57e..ef44d1c0b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -12,6 +12,7 @@ psutil==7.0.0 requests cryptography fabric-cicd>=0.3.1 +deltalake>=1.0.0,<2.0.0 # Testing and Building Requirements tox>=4.20.0 diff --git a/src/fabric_cli/client/fab_delta_client.py b/src/fabric_cli/client/fab_delta_client.py new file mode 100644 index 000000000..b3fa17c97 --- /dev/null +++ b/src/fabric_cli/client/fab_delta_client.py @@ -0,0 +1,74 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import json +from argparse import Namespace + +from deltalake import DeltaTable +from deltalake.exceptions import DeltaError + +from fabric_cli.core import fab_constant +from fabric_cli.core.fab_auth import FabAuth +from fabric_cli.core.fab_exceptions import FabricCLIError +from fabric_cli.errors import ErrorMessages + +# Item types whose OneLake Tables/ folder contains standard Delta tables. +# SemanticModel is explicitly excluded: its Tables/ entries are columnar +# semantic-model representations, not Delta-compatible parquet. +_DELTA_SUPPORTED_ITEM_TYPES: frozenset[str] = frozenset( + { + "Lakehouse", + "Warehouse", + "KQLDatabase", + "MirroredDatabase", + "SQLDatabase", + } +) + + +def get_table_schema(args: Namespace, local_path: str) -> list[dict]: + """Return schema fields for a Delta table on OneLake. + + Single seam for DeltaTable construction: owns item-type validation, + URI building, storage_options assembly, token acquisition, and + exception mapping. + """ + item_type = getattr(args, "item_type", None) + if item_type is not None and item_type not in _DELTA_SUPPORTED_ITEM_TYPES: + raise FabricCLIError( + ErrorMessages.Table.unsupported_item_type_for_delta(item_type), + fab_constant.ERROR_INVALID_ITEM_TYPE, + ) + + token = FabAuth().get_access_token(fab_constant.SCOPE_ONELAKE_DEFAULT) + if token is None: + raise FabricCLIError( + ErrorMessages.Auth.access_token_failed(), + fab_constant.ERROR_AUTHENTICATION_FAILED, + ) + + table_uri = ( + f"abfss://{args.ws_id}@{fab_constant.API_ENDPOINT_ONELAKE}" + f"/{args.lakehouse_id}/{local_path}" + ) + + try: + table = DeltaTable( + table_uri, + storage_options={ + "bearer_token": token, + "use_fabric_endpoint": "true", + }, + ) + schema_dict = json.loads(table.schema().to_json()) + schema_fields = schema_dict.get("fields") + if not isinstance(schema_fields, list): + raise ValueError( + "Delta table schema JSON does not contain a valid 'fields' list." + ) + return schema_fields + except (DeltaError, json.JSONDecodeError, ValueError) as exc: + raise FabricCLIError( + "Failed to extract the table schema. Please ensure the path points to a valid Delta table.", + fab_constant.ERROR_INVALID_DELTA_TABLE, + ) from exc diff --git a/src/fabric_cli/commands/tables/fab_tables_schema.py b/src/fabric_cli/commands/tables/fab_tables_schema.py index 74f3b39dd..6f2132358 100644 --- a/src/fabric_cli/commands/tables/fab_tables_schema.py +++ b/src/fabric_cli/commands/tables/fab_tables_schema.py @@ -1,80 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import json from argparse import Namespace -from typing import Optional -from fabric_cli.client import fab_api_onelake as onelake_api -from fabric_cli.core import fab_constant -from fabric_cli.core import fab_handle_context as handle_context -from fabric_cli.core.fab_exceptions import FabricCLIError -from fabric_cli.core.hiearchy.fab_hiearchy import OneLakeItem +from fabric_cli.client import fab_delta_client as delta_client from fabric_cli.utils import fab_ui -from fabric_cli.utils import fab_util as utils def exec_command(args: Namespace) -> None: - schema = _extract_schema_from_commit_logs(args) - if schema: - fab_ui.print_grey("Schema extracted successfully") - _schema = json.loads(schema)["fields"] - fab_ui.print_output_format(args, data=_schema, show_headers=True) + schema_fields = _get_table_schema(args) + fab_ui.print_grey("Schema extracted successfully") + fab_ui.print_output_format(args, data=schema_fields, show_headers=True) - else: - raise FabricCLIError( - "Failed to extract the table schema. Please ensure the path points to a valid Delta table", - fab_constant.ERROR_INVALID_DETLA_TABLE, - ) - -def _get_commit_logs(args: Namespace) -> Optional[list[str]]: - _delta_log_path = args.path - _delta_log_path[-1] = _delta_log_path[-1] + "/_delta_log" - - _context = handle_context.get_command_context(_delta_log_path, raise_error=True) - assert isinstance(_context, OneLakeItem) - onelake: OneLakeItem = _context - workspace_id = onelake.workspace.id - item_id = onelake.item.id - local_path = onelake.local_path - - local_path = utils.remove_dot_suffix(local_path) - args.directory = f"{workspace_id}/?recursive=false&resource=filesystem&directory={item_id}/{local_path}&getShortcutMetadata=true" - response = onelake_api.list_tables_files_recursive(args) - - if response.status_code in {200, 201}: - file_names = [f["name"] for f in response.json().get("paths", [])] - json_files = [ - f"{workspace_id}/{item_id}/{f.split('/', 1)[1]}" - for f in file_names - if f.endswith(".json") and f != "_temporary" - ] - json_files.sort(reverse=True) - return json_files - return None - - -def _extract_schema_from_commit_logs(args: Namespace) -> Optional[str]: - commit_logs = _get_commit_logs(args) - - if not commit_logs: - return None - - for log in commit_logs: - args.from_path = log - args.wait = True - response = onelake_api.read(args) - - if response.status_code in {200, 201}: - json_string = response.text - json_objects = json_string.strip().split("\n") - - for obj in json_objects: - commit_data = json.loads(obj) - if "metaData" in commit_data: - metadata = commit_data["metaData"] - schema = metadata["schemaString"] - return schema - - return None +def _get_table_schema(args: Namespace) -> list[dict]: + return delta_client.get_table_schema(args, args.table_local_path) diff --git a/src/fabric_cli/core/fab_constant.py b/src/fabric_cli/core/fab_constant.py index 4fd00e7b9..424299aaa 100644 --- a/src/fabric_cli/core/fab_constant.py +++ b/src/fabric_cli/core/fab_constant.py @@ -19,8 +19,7 @@ ) API_ENDPOINT_POWER_BI = ( - validate_and_get_env_variable( - "FAB_API_ENDPOINT_POWER_BI", "api.powerbi.com") + validate_and_get_env_variable("FAB_API_ENDPOINT_POWER_BI", "api.powerbi.com") + "/v1.0/myorg" ) @@ -264,7 +263,7 @@ ERROR_INVALID_OPERATION = "InvalidOperation" ERROR_INVALID_PATH = "InvalidPath" ERROR_INVALID_PROPERTY = "InvalidProperty" -ERROR_INVALID_DETLA_TABLE = "InvalidDeltaTable" +ERROR_INVALID_DELTA_TABLE = "InvalidDeltaTable" ERROR_INVALID_QUERY_FIELDS = "InvalidQueryFields" ERROR_INVALID_WORKSPACE_TYPE = "InvalidWorkspaceType" ERROR_INVALID_QUERY = "InvalidQuery" @@ -351,4 +350,3 @@ # Invalid query parameters for set command across all fabric resources SET_COMMAND_INVALID_QUERIES = ["id", "type", "workspaceId", "folderId"] - diff --git a/src/fabric_cli/errors/table.py b/src/fabric_cli/errors/table.py index 46e4f8773..2f2b25f4c 100644 --- a/src/fabric_cli/errors/table.py +++ b/src/fabric_cli/errors/table.py @@ -13,4 +13,11 @@ def invalid_format_argument(part: str) -> str: @staticmethod def invalid_key(key: str, allowed_keys: str) -> str: - return f"Invalid key: '{key}'. Allowed keys are: {allowed_keys}" \ No newline at end of file + return f"Invalid key: '{key}'. Allowed keys are: {allowed_keys}" + + @staticmethod + def unsupported_item_type_for_delta(item_type: str) -> str: + return ( + f"'{item_type}' does not expose Delta tables under Tables/. " + "Delta table schema is supported for: Lakehouse, Warehouse, KQLDatabase, MirroredDatabase, SQLDatabase." + ) \ No newline at end of file diff --git a/src/fabric_cli/utils/fab_cmd_table_utils.py b/src/fabric_cli/utils/fab_cmd_table_utils.py index 3b00fb6af..d7fded41d 100644 --- a/src/fabric_cli/utils/fab_cmd_table_utils.py +++ b/src/fabric_cli/utils/fab_cmd_table_utils.py @@ -7,6 +7,7 @@ from fabric_cli.core.fab_exceptions import FabricCLIError from fabric_cli.core.hiearchy.fab_onelake_element import OneLakeItem from fabric_cli.errors import ErrorMessages +from fabric_cli.utils import fab_util as utils def add_table_props_to_args(args: Any, context: OneLakeItem) -> None: @@ -21,8 +22,9 @@ def add_table_props_to_args(args: Any, context: OneLakeItem) -> None: args.lakehouse_path = context.item.path table_path = context.local_path.split("/") - args.table_name = table_path[-1] + args.table_name = utils.remove_dot_suffix(table_path[-1]) args.schema = table_path[-2] if len(table_path) == 3 else None + args.table_local_path = utils.remove_dot_suffix(context.local_path) def convert_hours_to_dhhmmss(hours: int) -> str: diff --git a/tests/test_commands/commands_parser.py b/tests/test_commands/commands_parser.py index db66f60b1..34695af74 100644 --- a/tests/test_commands/commands_parser.py +++ b/tests/test_commands/commands_parser.py @@ -2,6 +2,7 @@ # Licensed under the MIT License. import platform + from prompt_toolkit.input import DummyInput from prompt_toolkit.output import DummyOutput @@ -12,6 +13,9 @@ from fabric_cli.parsers.fab_config_parser import ( register_parser as register_config_parser, ) +from fabric_cli.parsers.fab_find_parser import ( + register_parser as register_find_parser, +) from fabric_cli.parsers.fab_fs_parser import ( register_assign_parser, register_cd_parser, @@ -32,13 +36,13 @@ register_stop_parser, register_unassign_parser, ) -from fabric_cli.parsers.fab_find_parser import ( - register_parser as register_find_parser, -) from fabric_cli.parsers.fab_jobs_parser import register_parser as register_jobs_parser from fabric_cli.parsers.fab_labels_parser import ( register_parser as register_labels_parser, ) +from fabric_cli.parsers.fab_tables_parser import ( + register_parser as register_tables_parser, +) parserHandlers = [ register_labels_parser, @@ -65,6 +69,7 @@ register_rm_parser, register_mkdir_parser, register_jobs_parser, + register_tables_parser, ] diff --git a/tests/test_commands/recordings/test_commands/test_tables_schema/class_setup.yaml b/tests/test_commands/recordings/test_commands/test_tables_schema/class_setup.yaml new file mode 100644 index 000000000..a6d2e8bd5 --- /dev/null +++ b/tests/test_commands/recordings/test_commands/test_tables_schema/class_setup.yaml @@ -0,0 +1,180 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Type: + - application/json + User-Agent: + - ms-fabric-cli-test/1.0.0 + method: GET + uri: https://api.fabric.microsoft.com/v1/workspaces + response: + body: + string: '{"value": [{"id": "94da8ea5-0bd6-4a9e-b717-5fdb482f4c71", "displayName": + "My workspace", "description": "", "type": "Personal"}]}' + headers: + Content-Type: + - application/json; charset=utf-8 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Type: + - application/json + User-Agent: + - ms-fabric-cli-test/1.0.0 + method: GET + uri: https://api.fabric.microsoft.com/v1/workspaces + response: + body: + string: '{"value": [{"id": "94da8ea5-0bd6-4a9e-b717-5fdb482f4c71", "displayName": + "My workspace", "description": "", "type": "Personal"}]}' + headers: + Content-Type: + - application/json; charset=utf-8 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Type: + - application/json + User-Agent: + - ms-fabric-cli-test/1.0.0 + method: GET + uri: https://api.fabric.microsoft.com/v1/capacities + response: + body: + string: '{"value": [{"id": "00000000-0000-0000-0000-000000000004", "displayName": + "mocked_fabriccli_capacity_name", "sku": "F2", "region": "West Europe", "state": + "Active"}]}' + headers: + Content-Type: + - application/json; charset=utf-8 + status: + code: 200 + message: OK +- request: + body: '{"displayName": "fabriccli_WorkspacePerTestclass_000001", "capacityId": "00000000-0000-0000-0000-000000000004"}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Type: + - application/json + User-Agent: + - ms-fabric-cli-test/1.0.0 + method: POST + uri: https://api.fabric.microsoft.com/v1/workspaces + response: + body: + string: '{"id": "d5e6f7a8-b9c0-d1e2-f3a4-b5c6d7e8f9a0", "displayName": "fabriccli_WorkspacePerTestclass_000001", + "type": "Workspace", "capacityId": "00000000-0000-0000-0000-000000000004"}' + headers: + Content-Type: + - application/json; charset=utf-8 + status: + code: 201 + message: Created +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Type: + - application/json + User-Agent: + - ms-fabric-cli-test/1.0.0 + method: GET + uri: https://api.fabric.microsoft.com/v1/workspaces + response: + body: + string: '{"value": [{"id": "94da8ea5-0bd6-4a9e-b717-5fdb482f4c71", "displayName": + "My workspace", "description": "", "type": "Personal"}, {"id": "d5e6f7a8-b9c0-d1e2-f3a4-b5c6d7e8f9a0", + "displayName": "fabriccli_WorkspacePerTestclass_000001", "type": "Workspace", "capacityId": + "00000000-0000-0000-0000-000000000004"}]}' + headers: + Content-Type: + - application/json; charset=utf-8 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Type: + - application/json + User-Agent: + - ms-fabric-cli-test/1.0.0 + method: GET + uri: https://api.fabric.microsoft.com/v1/workspaces/d5e6f7a8-b9c0-d1e2-f3a4-b5c6d7e8f9a0/items + response: + body: + string: '{"value": []}' + headers: + Content-Type: + - application/json; charset=utf-8 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '0' + Content-Type: + - application/json + User-Agent: + - ms-fabric-cli-test/1.0.0 + method: DELETE + uri: https://api.fabric.microsoft.com/v1/workspaces/d5e6f7a8-b9c0-d1e2-f3a4-b5c6d7e8f9a0 + response: + body: + string: '' + headers: + Content-Type: + - application/octet-stream + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_commands/test_tables_schema.py b/tests/test_commands/test_tables_schema.py new file mode 100644 index 000000000..f514451e1 --- /dev/null +++ b/tests/test_commands/test_tables_schema.py @@ -0,0 +1,34 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from unittest.mock import MagicMock, patch + +from tests.test_commands.commands_parser import CLIExecutor + +_SCHEMA_COMMAND = "fabric_cli.commands.tables.fab_tables.schema_command" + + +class TestTablesSchemaIntegration: + """Dispatch test: verifies the parser routes 'table schema ' to schema_command.""" + + def test_table_schema_dispatches_to_schema_command(self, cli_executor: CLIExecutor): + with patch(_SCHEMA_COMMAND) as mock_cmd: + cli_executor.exec_command( + "table schema /ws.Workspace/lh.Lakehouse/Tables/my_table" + ) + + mock_cmd.assert_called_once() + args = mock_cmd.call_args[0][0] + assert args.path == ["/ws.Workspace/lh.Lakehouse/Tables/my_table"] + + def test_table_schema_dispatches_with_schema_namespace( + self, cli_executor: CLIExecutor + ): + with patch(_SCHEMA_COMMAND) as mock_cmd: + cli_executor.exec_command( + "table schema /ws.Workspace/lh.Lakehouse/Tables/dbo/my_table" + ) + + mock_cmd.assert_called_once() + args = mock_cmd.call_args[0][0] + assert args.path == ["/ws.Workspace/lh.Lakehouse/Tables/dbo/my_table"] diff --git a/tests/test_core/test_delta_client.py b/tests/test_core/test_delta_client.py new file mode 100644 index 000000000..bc50aaba3 --- /dev/null +++ b/tests/test_core/test_delta_client.py @@ -0,0 +1,430 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import json +from argparse import Namespace +from decimal import Decimal +from unittest.mock import MagicMock, patch + +import pyarrow as pa +import pytest +from deltalake import DeltaTable, write_deltalake +from deltalake.exceptions import DeltaError, TableNotFoundError + +from fabric_cli.commands.tables import fab_tables_schema +from fabric_cli.core import fab_constant +from fabric_cli.core.fab_exceptions import FabricCLIError + +_DELTA_CLIENT = "fabric_cli.client.fab_delta_client" + + +class TestDeltaClientSchemaUnit: + """Unit tests for fab_delta_client schema extraction — no network, no VCR.""" + + @pytest.fixture + def mock_auth(self): + with patch(f"{_DELTA_CLIENT}.FabAuth") as mock: + instance = MagicMock() + instance.get_access_token.return_value = "mock_token" + mock.return_value = instance + yield mock + + @pytest.fixture + def mock_delta_table(self): + with patch(f"{_DELTA_CLIENT}.DeltaTable") as mock: + yield mock + + def _make_delta_table_mock(self, mock_delta_table, schema_json): + mock_arrow_schema = MagicMock() + mock_arrow_schema.to_json.return_value = schema_json + mock_table_instance = MagicMock() + mock_table_instance.schema.return_value = mock_arrow_schema + mock_delta_table.return_value = mock_table_instance + + def test_auth_token_none_raises_authentication_error(self): + args = Namespace(ws_id="ws", lakehouse_id="lh", table_local_path="Tables/t") + with patch(f"{_DELTA_CLIENT}.FabAuth") as mock_auth: + mock_auth.return_value.get_access_token.return_value = None + with pytest.raises(FabricCLIError) as exc_info: + fab_tables_schema._get_table_schema(args) + assert exc_info.value.status_code == fab_constant.ERROR_AUTHENTICATION_FAILED + + def test_get_table_schema_success(self, mock_auth, mock_delta_table): + args = Namespace( + ws_id="test-ws-id", + lakehouse_id="test-lakehouse-id", + table_local_path="Tables/test_table", + ) + mock_schema = { + "fields": [ + {"name": "id", "type": "integer", "nullable": False, "metadata": {}}, + {"name": "name", "type": "string", "nullable": True, "metadata": {}}, + ] + } + self._make_delta_table_mock(mock_delta_table, json.dumps(mock_schema)) + + result = fab_tables_schema._get_table_schema(args) + + assert len(result) == 2 + assert result[0] == { + "name": "id", + "type": "integer", + "nullable": False, + "metadata": {}, + } + assert result[1] == { + "name": "name", + "type": "string", + "nullable": True, + "metadata": {}, + } + + call_args = mock_delta_table.call_args + assert "test-lakehouse-id" in call_args[0][0] + assert "Tables/test_table" in call_args[0][0] + assert call_args[1]["storage_options"]["bearer_token"] == "mock_token" + assert call_args[1]["storage_options"]["use_fabric_endpoint"] == "true" + + def test_get_table_schema_with_schema_namespace(self, mock_auth, mock_delta_table): + """Schema-qualified path (e.g. Tables/dbo/table) passes the full path to DeltaTable.""" + args = Namespace( + ws_id="test-ws-id", + lakehouse_id="test-lakehouse-id", + table_local_path="Tables/dbo/test_table", + ) + self._make_delta_table_mock( + mock_delta_table, + json.dumps( + { + "fields": [ + { + "name": "col1", + "type": "long", + "nullable": True, + "metadata": {}, + } + ] + } + ), + ) + + result = fab_tables_schema._get_table_schema(args) + + assert mock_delta_table.call_args[0][0].endswith("Tables/dbo/test_table") + assert result[0]["name"] == "col1" + + def test_abfss_uri_format(self, mock_auth, mock_delta_table): + """DeltaTable must be called with a well-formed abfss:// URI.""" + args = Namespace( + ws_id="workspace-guid-123", + lakehouse_id="lakehouse-guid-456", + table_local_path="Tables/my_table", + ) + self._make_delta_table_mock( + mock_delta_table, + json.dumps( + { + "fields": [ + { + "name": "c", + "type": "string", + "nullable": True, + "metadata": {}, + } + ] + } + ), + ) + + fab_tables_schema._get_table_schema(args) + + uri = mock_delta_table.call_args[0][0] + assert uri.startswith("abfss://workspace-guid-123@") + assert "lakehouse-guid-456" in uri + assert "Tables/my_table" in uri + opts = mock_delta_table.call_args[1]["storage_options"] + assert opts["bearer_token"] == "mock_token" + assert opts["use_fabric_endpoint"] == "true" + + @pytest.mark.parametrize("error_cls", [TableNotFoundError, DeltaError]) + def test_delta_exceptions_map_to_fabric_cli_error( + self, mock_auth, mock_delta_table, error_cls + ): + args = Namespace(ws_id="ws", lakehouse_id="lh", table_local_path="Tables/t") + mock_delta_table.side_effect = error_cls("error") + + with pytest.raises(FabricCLIError) as exc_info: + fab_tables_schema._get_table_schema(args) + + assert exc_info.value.status_code == fab_constant.ERROR_INVALID_DELTA_TABLE + assert "Failed to extract the table schema" in exc_info.value.message + + def test_invalid_json_maps_to_fabric_cli_error(self, mock_auth, mock_delta_table): + args = Namespace(ws_id="ws", lakehouse_id="lh", table_local_path="Tables/t") + self._make_delta_table_mock(mock_delta_table, "invalid json {") + + with pytest.raises(FabricCLIError) as exc_info: + fab_tables_schema._get_table_schema(args) + + assert exc_info.value.status_code == fab_constant.ERROR_INVALID_DELTA_TABLE + + def test_missing_fields_key_maps_to_fabric_cli_error( + self, mock_auth, mock_delta_table + ): + args = Namespace(ws_id="ws", lakehouse_id="lh", table_local_path="Tables/t") + self._make_delta_table_mock(mock_delta_table, json.dumps({"other": "value"})) + + with pytest.raises(FabricCLIError) as exc_info: + fab_tables_schema._get_table_schema(args) + + assert exc_info.value.status_code == fab_constant.ERROR_INVALID_DELTA_TABLE + + def test_fields_not_list_maps_to_fabric_cli_error( + self, mock_auth, mock_delta_table + ): + args = Namespace(ws_id="ws", lakehouse_id="lh", table_local_path="Tables/t") + self._make_delta_table_mock( + mock_delta_table, json.dumps({"fields": "not a list"}) + ) + + with pytest.raises(FabricCLIError) as exc_info: + fab_tables_schema._get_table_schema(args) + + assert exc_info.value.status_code == fab_constant.ERROR_INVALID_DELTA_TABLE + + def test_complex_schema_field_contract(self, mock_auth, mock_delta_table): + """Lock the exact JSON shape returned for complex Delta types. + + delta-rs serialises Arrow → Delta-protocol JSON via Schema.to_json(). + The mapping below was validated against the installed deltalake wheel + and must be stable for users who pipe --output_format json into scripts. + + Verified mappings: + pyarrow int64 → "long" (NOT "integer") + pyarrow decimal128 → "decimal(10,2)" (compact string, NOT an object) + pyarrow timestamp('us') → "timestamp_ntz" (NOT "timestamp") + map / struct → nested objects with keyType/valueType/fields + """ + complex_schema_json = { + "type": "struct", + "fields": [ + {"name": "id", "type": "long", "nullable": False, "metadata": {}}, + { + "name": "price", + "type": "decimal(10,2)", + "nullable": True, + "metadata": {}, + }, + { + "name": "created_at", + "type": "timestamp_ntz", + "nullable": True, + "metadata": {}, + }, + { + "name": "tags", + "type": { + "type": "map", + "keyType": "string", + "valueType": "string", + "valueContainsNull": True, + }, + "nullable": True, + "metadata": {}, + }, + { + "name": "address", + "type": { + "type": "struct", + "fields": [ + { + "name": "street", + "type": "string", + "nullable": True, + "metadata": {}, + }, + { + "name": "city", + "type": "string", + "nullable": True, + "metadata": {}, + }, + ], + }, + "nullable": True, + "metadata": {}, + }, + ], + } + args = Namespace( + ws_id="ws", lakehouse_id="lh", table_local_path="Tables/complex_table" + ) + self._make_delta_table_mock(mock_delta_table, json.dumps(complex_schema_json)) + + fields = fab_tables_schema._get_table_schema(args) + + assert len(fields) == 5 + assert fields[0] == { + "name": "id", + "type": "long", + "nullable": False, + "metadata": {}, + } + assert fields[1] == { + "name": "price", + "type": "decimal(10,2)", + "nullable": True, + "metadata": {}, + } + assert fields[2] == { + "name": "created_at", + "type": "timestamp_ntz", + "nullable": True, + "metadata": {}, + } + assert fields[3] == { + "name": "tags", + "type": { + "type": "map", + "keyType": "string", + "valueType": "string", + "valueContainsNull": True, + }, + "nullable": True, + "metadata": {}, + } + assert fields[4] == { + "name": "address", + "type": { + "type": "struct", + "fields": [ + { + "name": "street", + "type": "string", + "nullable": True, + "metadata": {}, + }, + { + "name": "city", + "type": "string", + "nullable": True, + "metadata": {}, + }, + ], + }, + "nullable": True, + "metadata": {}, + } + + +class TestDeltaItemTypeValidation: + """Only item types with a Delta-compatible Tables/ folder are accepted.""" + + @pytest.fixture + def mock_auth(self): + with patch(f"{_DELTA_CLIENT}.FabAuth") as mock: + mock.return_value.get_access_token.return_value = "mock_token" + yield mock + + @pytest.fixture + def mock_delta_table(self): + with patch(f"{_DELTA_CLIENT}.DeltaTable") as mock: + schema = MagicMock() + schema.to_json.return_value = json.dumps({"fields": []}) + mock.return_value.schema.return_value = schema + yield mock + + @pytest.mark.parametrize( + "item_type", + [ + "Lakehouse", + "Warehouse", + "KQLDatabase", + "MirroredDatabase", + "SQLDatabase", + ], + ) + def test_supported_item_types_pass_validation( + self, mock_auth, mock_delta_table, item_type + ): + args = Namespace( + ws_id="ws", + lakehouse_id="lh", + table_local_path="Tables/t", + item_type=item_type, + ) + fab_tables_schema._get_table_schema(args) # must not raise + + def test_semantic_model_raises_clear_error(self, mock_auth, mock_delta_table): + args = Namespace( + ws_id="ws", + lakehouse_id="lh", + table_local_path="Tables/t", + item_type="SemanticModel", + ) + with pytest.raises(FabricCLIError) as exc_info: + fab_tables_schema._get_table_schema(args) + assert exc_info.value.status_code == fab_constant.ERROR_INVALID_ITEM_TYPE + assert "SemanticModel" in exc_info.value.message + assert "Delta" in exc_info.value.message + + def test_missing_item_type_does_not_raise(self, mock_auth, mock_delta_table): + """item_type may be absent when called directly in unit tests.""" + args = Namespace(ws_id="ws", lakehouse_id="lh", table_local_path="Tables/t") + fab_tables_schema._get_table_schema(args) # must not raise + + +class TestTablesSchemaCheckpointRegression: + """Regression for #228: schema must be readable when only a checkpoint exists. + + The old implementation walked _delta_log/*.json manually; after log compaction + those files are removed and only *.checkpoint.parquet + _last_checkpoint remain. + The new implementation delegates to DeltaTable.schema(), which uses delta-rs's + native reader that prefers checkpoints over JSON logs. + """ + + @pytest.fixture + def checkpointed_delta_table(self, tmp_path): + """Real local Delta table: checkpoint written, JSON commit log removed.""" + table_path = tmp_path / "test_table" + df = pa.table( + { + "id": pa.array([1, 2], pa.int64()), + "price": pa.array( + [Decimal("9.99"), Decimal("19.99")], pa.decimal128(10, 2) + ), + "created_at": pa.array([1_000_000, 2_000_000], pa.timestamp("us")), + } + ) + write_deltalake(str(table_path), df) + + dt = DeltaTable(str(table_path)) + dt.create_checkpoint() + + for json_log in (table_path / "_delta_log").glob("*.json"): + json_log.unlink() + + log_files = list((table_path / "_delta_log").iterdir()) + assert not any( + f.suffix == ".json" for f in log_files + ), "fixture must leave no JSON logs — only checkpoint parquet" + return table_path + + def test_schema_readable_after_log_compaction(self, checkpointed_delta_table): + """Schema extraction succeeds when only a checkpoint parquet file exists.""" + real_dt = DeltaTable(str(checkpointed_delta_table)) + args = Namespace( + ws_id="ws-id", lakehouse_id="lh-id", table_local_path="Tables/test_table" + ) + + with ( + patch(f"{_DELTA_CLIENT}.FabAuth") as mock_auth, + patch(f"{_DELTA_CLIENT}.DeltaTable", return_value=real_dt), + ): + mock_auth.return_value.get_access_token.return_value = "mock_token" + fields = fab_tables_schema._get_table_schema(args) + + assert [f["name"] for f in fields] == ["id", "price", "created_at"] + assert fields[0]["type"] == "long" + assert fields[1]["type"] == "decimal(10,2)" + assert fields[2]["type"] == "timestamp_ntz" diff --git a/tests/test_utils/test_fab_cmd_table_utils.py b/tests/test_utils/test_fab_cmd_table_utils.py new file mode 100644 index 000000000..4b7bf3bff --- /dev/null +++ b/tests/test_utils/test_fab_cmd_table_utils.py @@ -0,0 +1,53 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from argparse import Namespace +from unittest.mock import MagicMock + +from fabric_cli.core.hiearchy.fab_onelake_element import OneLakeItem +from fabric_cli.utils import fab_cmd_table_utils as utils_table + + +def _make_context(local_path: str) -> MagicMock: + context = MagicMock() + context.__class__ = OneLakeItem # make isinstance(context, OneLakeItem) pass + context.local_path = local_path + return context + + +class TestAddTablePropsToArgs: + """Unit tests for add_table_props_to_args path normalization.""" + + def test_shortcut_suffix_stripped_from_table_name(self): + """Regression: .Shortcut must not appear in args.table_name (used in REST URIs).""" + args = Namespace() + utils_table.add_table_props_to_args( + args, _make_context("Tables/my_table.Shortcut") + ) + assert args.table_name == "my_table" + + def test_shortcut_suffix_stripped_from_table_local_path(self): + """Regression: .Shortcut must not appear in args.table_local_path.""" + args = Namespace() + utils_table.add_table_props_to_args( + args, _make_context("Tables/my_table.Shortcut") + ) + assert args.table_local_path == "Tables/my_table" + + def test_shortcut_suffix_stripped_from_schema_qualified_path(self): + """Regression: .Shortcut must not appear in schema-qualified table_local_path.""" + args = Namespace() + utils_table.add_table_props_to_args( + args, _make_context("Tables/dbo/my_table.Shortcut") + ) + assert args.table_local_path == "Tables/dbo/my_table" + + def test_normal_path_unchanged(self): + args = Namespace() + utils_table.add_table_props_to_args(args, _make_context("Tables/my_table")) + assert args.table_local_path == "Tables/my_table" + + def test_schema_qualified_path_unchanged(self): + args = Namespace() + utils_table.add_table_props_to_args(args, _make_context("Tables/dbo/my_table")) + assert args.table_local_path == "Tables/dbo/my_table"