diff --git a/.github/bump_version.py b/.github/bump_version.py index 391c64dc..03c28d8d 100644 --- a/.github/bump_version.py +++ b/.github/bump_version.py @@ -153,12 +153,51 @@ def sync_release_manifest_versions(manifest_dir: Path, new_version: str): print(f" Updated {manifest_path}") +def sync_stack_versions(stack_path: Path, new_version: str): + if not stack_path.exists(): + return + text = stack_path.read_text() + replacements = [ + ( + r'(^stack_version\s*=\s*")([^"]+)(")', + rf"\g<1>{new_version}\g<3>", + ), + ( + r'(^policyengine_version\s*=\s*")([^"]+)(")', + rf"\g<1>{new_version}\g<3>", + ), + ( + r'(\[packages\.policyengine\]\s+name\s*=\s*"policyengine"\s+version\s*=\s*")([^"]+)(")', + rf"\g<1>{new_version}\g<3>", + ), + ] + updated = text + for pattern, replacement in replacements: + updated, count = re.subn( + pattern, + replacement, + updated, + count=1, + flags=re.MULTILINE, + ) + if count == 0: + print( + f"Could not update {stack_path}: missing stack version field.", + file=sys.stderr, + ) + sys.exit(1) + if updated != text: + stack_path.write_text(updated) + print(f" Updated {stack_path}") + + def main(): root = Path(__file__).resolve().parent.parent pyproject = root / "pyproject.toml" changelog = root / "CHANGELOG.md" changelog_dir = root / "changelog.d" manifest_dir = root / "src" / "policyengine" / "data" / "release_manifests" + stack_path = root / "policyengine-stack.toml" current = get_current_version(pyproject, changelog, root) bump = infer_bump(changelog_dir) @@ -168,6 +207,7 @@ def main(): update_file(pyproject, new) sync_release_manifest_versions(manifest_dir, new) + sync_stack_versions(stack_path, new) if __name__ == "__main__": diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index 6afecf5a..9be31b2b 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -6,8 +6,11 @@ on: paths: - src/** - tests/** + - scripts/** - .github/** - changelog.d/** + - pyproject.toml + - policyengine-stack.toml workflow_dispatch: jobs: @@ -69,6 +72,25 @@ jobs: run: uv pip install --system . h5py - name: Smoke-import core modules run: python -c "import policyengine; from policyengine.core import Dataset, Policy, Simulation; from policyengine.outputs import aggregate, poverty, inequality; print('import OK')" + StackVerification: + name: Verify stack metadata + runs-on: ubuntu-latest + env: + POLICYENGINE_SKIP_COUNTRY_IMPORTS: "1" + steps: + - uses: actions/checkout@v6 + - name: Install uv + uses: astral-sh/setup-uv@v8.1.0 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.13' + - name: Check generated stack artifacts + run: python scripts/generate_stack_artifacts.py --check + - name: Install stack model extra + run: uv pip install -e ".[models]" --system + - name: Verify stack + run: policyengine stack verify --extra models --check-uris Test: runs-on: macos-latest strategy: diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index 5ccaa6e2..bf7b350a 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -117,6 +117,8 @@ jobs: python-version: '3.13' - name: Build changelog run: pip install yaml-changelog towncrier && make changelog + - name: Generate stack artifacts + run: python scripts/generate_stack_artifacts.py - name: Preview changelog update run: ".github/get-changelog-diff.sh" - name: Install package for TRO regeneration @@ -125,7 +127,7 @@ jobs: env: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} run: python scripts/generate_trace_tros.py - - name: Update changelog and TROs + - name: Update changelog, stack metadata, and TROs uses: EndBug/add-and-commit@v9 with: add: "." @@ -154,6 +156,15 @@ jobs: run: ".github/publish-git-tag.sh" - name: Build package run: python -m build + - name: Export stack release assets + run: python scripts/export_stack_release_assets.py --dist-dir dist + - name: Verify stack release metadata + env: + POLICYENGINE_SKIP_COUNTRY_IMPORTS: "1" + run: | + VERSION=$(python .github/fetch_version.py) + policyengine stack verify --extra models --check-uris --json \ + > "dist/policyengine-stack-$VERSION.verification.json" - name: Publish a Python distribution to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: @@ -166,4 +177,8 @@ jobs: gh release create "$VERSION" \ --title "v$VERSION" \ --notes "See [CHANGELOG.md](https://github.com/PolicyEngine/policyengine.py/blob/main/CHANGELOG.md) for details." \ - --latest + --latest \ + "dist/policyengine-stack-$VERSION.json" \ + "dist/policyengine-stack-$VERSION.constraints.txt" \ + "dist/policyengine-stack-$VERSION.citation.txt" \ + "dist/policyengine-stack-$VERSION.verification.json" diff --git a/changelog.d/stack-system.added.md b/changelog.d/stack-system.added.md new file mode 100644 index 00000000..219422cd --- /dev/null +++ b/changelog.d/stack-system.added.md @@ -0,0 +1 @@ +Add a pip-native PolicyEngine stack manifest, generated extras, stack CLI, and fast stack verification workflow. diff --git a/docs/stacks.md b/docs/stacks.md new file mode 100644 index 00000000..a6046613 --- /dev/null +++ b/docs/stacks.md @@ -0,0 +1,41 @@ +# PolicyEngine stacks + +A PolicyEngine stack is the exact first-party package set certified for a +`policyengine` release. Installation is standard pip: + +```bash +pip install "policyengine[full]==4.19.1" +pip install "policyengine[us-full]==4.19.1" +pip install "policyengine[models]==4.19.1" +``` + +The stack source of truth is `policyengine-stack.toml`. Generated artifacts are: + +- `pyproject.toml` extras +- `src/policyengine/data/stack/manifest.json` +- GitHub release assets exported from the packaged manifest + +## Stack-only PRs + +Run: + +```bash +python scripts/prepare_stack_update.py \ + --core 3.27.0 \ + --us 1.730.0 \ + --uk 2.91.0 \ + --us-data 1.118.0 \ + --uk-data 1.45.0 +``` + +This updates stack metadata and creates a patch changelog fragment. Do not bump +the `policyengine` version manually in the PR; the existing release workflow +bumps the package and stack versions together after merge. + +CI checks generated artifacts, installs `.[models]`, and verifies the packaged +stack metadata with lightweight URI checks. Full data +package installation is available through `policyengine[full]`; this includes +both `policyengine-us-data` and `policyengine-uk-data` when their package +versions are installable for the target Python/platform. Dataset artifact +versions and release manifest URIs are recorded separately in the stack manifest +for citation and verification. diff --git a/policyengine-stack.toml b/policyengine-stack.toml new file mode 100644 index 00000000..4868d7b3 --- /dev/null +++ b/policyengine-stack.toml @@ -0,0 +1,72 @@ +schema_version = 1 +stack_version = "4.4.2" +policyengine_version = "4.4.2" + +[packages.policyengine] +name = "policyengine" +version = "4.4.2" +import_name = "policyengine" +role = "stack_carrier" + +[packages.policyengine-core] +name = "policyengine-core" +version = "3.26.1" +import_name = "policyengine_core" +role = "runtime_dependency" + +[packages.policyengine-us] +name = "policyengine-us" +version = "1.687.0" +import_name = "policyengine_us" +role = "country_model" +country = "us" + +[packages.policyengine-uk] +name = "policyengine-uk" +version = "2.88.14" +import_name = "policyengine_uk" +role = "country_model" +country = "uk" + +[packages.policyengine-us-data] +name = "policyengine-us-data" +version = "1.78.2" +import_name = "policyengine_us_data" +role = "country_data" +country = "us" +optional = true +markers = "python_version >= '3.12' and python_version < '3.15'" + +[packages.policyengine-uk-data] +name = "policyengine-uk-data" +version = "1.11.1" +import_name = "policyengine_uk_data" +role = "country_data" +country = "uk" +optional = true + +[extras] +models = ["policyengine-core", "policyengine-us", "policyengine-uk"] +data = ["policyengine-us-data", "policyengine-uk-data"] +full = ["policyengine-core", "policyengine-us", "policyengine-uk", "policyengine-us-data", "policyengine-uk-data"] +us = ["policyengine-core", "policyengine-us"] +uk = ["policyengine-core", "policyengine-uk"] +us-data = ["policyengine-us-data"] +uk-data = ["policyengine-uk-data"] +us-full = ["policyengine-core", "policyengine-us", "policyengine-us-data"] +uk-full = ["policyengine-core", "policyengine-uk", "policyengine-uk-data"] + +[countries.us] +model_package = "policyengine-us" +data_package = "policyengine-us-data" +default_dataset = "enhanced_cps_2024" +default_dataset_uri = "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2" +release_manifest_uri = "https://huggingface.co/policyengine/policyengine-us-data/resolve/9cb665df0a546f9c3d79b496f8eb2dd55859d38d/releases/1.78.2/release_manifest.json" + +[countries.uk] +model_package = "policyengine-uk" +data_package = "policyengine-uk-data" +data_artifact_version = "1.55.5" +default_dataset = "enhanced_frs_2023_24" +default_dataset_uri = "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.55.5" +release_manifest_uri = "https://huggingface.co/policyengine/policyengine-uk-data-private/resolve/1.55.5/release_manifest.json" diff --git a/pyproject.toml b/pyproject.toml index c7d0f6b9..e5f537d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,14 +40,46 @@ plotting = [ graph = [ "networkx>=3.0", ] -uk = [ - "policyengine_core>=3.26.0", +models = [ + "policyengine-core==3.26.1", + "policyengine-us==1.687.0", + "policyengine-uk==2.88.14", +] +data = [ + "policyengine-us-data==1.78.2; python_version >= '3.12' and python_version < '3.15'", + "policyengine-uk-data==1.11.1", +] +full = [ + "policyengine-core==3.26.1", + "policyengine-us==1.687.0", "policyengine-uk==2.88.14", + "policyengine-us-data==1.78.2; python_version >= '3.12' and python_version < '3.15'", + "policyengine-uk-data==1.11.1", ] us = [ - "policyengine_core>=3.26.0", + "policyengine-core==3.26.1", "policyengine-us==1.687.0", ] +uk = [ + "policyengine-core==3.26.1", + "policyengine-uk==2.88.14", +] +us-data = [ + "policyengine-us-data==1.78.2; python_version >= '3.12' and python_version < '3.15'", +] +uk-data = [ + "policyengine-uk-data==1.11.1", +] +us-full = [ + "policyengine-core==3.26.1", + "policyengine-us==1.687.0", + "policyengine-us-data==1.78.2; python_version >= '3.12' and python_version < '3.15'", +] +uk-full = [ + "policyengine-core==3.26.1", + "policyengine-uk==2.88.14", + "policyengine-uk-data==1.11.1", +] dev = [ "pytest", "furo", @@ -59,12 +91,12 @@ dev = [ "plotly>=5.0.0", "pytest-asyncio>=0.26.0", "ruff>=0.9.0", - "policyengine_core>=3.26.0", - "policyengine-uk==2.88.14", - "policyengine-us==1.687.0", "towncrier>=24.8.0", "mypy>=1.11.0", "pytest-cov>=5.0.0", + "policyengine-core==3.26.1", + "policyengine-us==1.687.0", + "policyengine-uk==2.88.14", ] [tool.setuptools] diff --git a/scripts/export_stack_release_assets.py b/scripts/export_stack_release_assets.py new file mode 100644 index 00000000..6e2ca374 --- /dev/null +++ b/scripts/export_stack_release_assets.py @@ -0,0 +1,54 @@ +"""Export stack metadata files for the GitHub release.""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +from generate_stack_artifacts import REPO_ROOT, STACK_MANIFEST + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--dist-dir", type=Path, default=REPO_ROOT / "dist") + args = parser.parse_args() + + stack = json.loads(STACK_MANIFEST.read_text()) + version = stack["stack_version"] + args.dist_dir.mkdir(parents=True, exist_ok=True) + + manifest_path = args.dist_dir / f"policyengine-stack-{version}.json" + manifest_path.write_text(json.dumps(stack, indent=2, sort_keys=True) + "\n") + + constraints_path = args.dist_dir / f"policyengine-stack-{version}.constraints.txt" + full_packages = ["policyengine", *stack["extras"]["full"]] + constraints = [ + stack["packages"][package]["install_requirement"] for package in full_packages + ] + constraints_path.write_text("\n".join(constraints) + "\n") + + citation_path = args.dist_dir / f"policyengine-stack-{version}.citation.txt" + citation_path.write_text( + "\n".join( + [ + f"PolicyEngine stack {version}", + f"PolicyEngine package version: {stack['policyengine_version']}", + "Components:", + *( + f"- {component['name']} {component['version']}" + for _, component in sorted(stack["packages"].items()) + ), + ] + ) + + "\n" + ) + + print(manifest_path) + print(constraints_path) + print(citation_path) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/generate_stack_artifacts.py b/scripts/generate_stack_artifacts.py new file mode 100644 index 00000000..213813be --- /dev/null +++ b/scripts/generate_stack_artifacts.py @@ -0,0 +1,168 @@ +"""Generate pip extras and packaged stack metadata from policyengine-stack.toml.""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path +from typing import Any, Mapping + +try: + import tomllib +except ModuleNotFoundError: # pragma: no cover - for local Python 3.10 users. + import tomli as tomllib # type: ignore[no-redef] + +REPO_ROOT = Path(__file__).resolve().parents[1] +STACK_SOURCE = REPO_ROOT / "policyengine-stack.toml" +PYPROJECT = REPO_ROOT / "pyproject.toml" +STACK_MANIFEST = REPO_ROOT / "src" / "policyengine" / "data" / "stack" / "manifest.json" + +OPTIONAL_DEPENDENCIES_HEADER = "[project.optional-dependencies]" +NEXT_SECTION_PATTERN = re.compile(r"\n\[tool\.setuptools\]", re.MULTILINE) +def load_toml(path: Path) -> dict[str, Any]: + with path.open("rb") as stream: + return tomllib.load(stream) + + +def generated_manifest(stack: Mapping[str, Any]) -> dict[str, Any]: + packages = { + key: { + **value, + "install_requirement": exact_requirement(value), + } + for key, value in stack["packages"].items() + } + return { + "schema_version": int(stack["schema_version"]), + "stack_version": stack["stack_version"], + "policyengine_version": stack["policyengine_version"], + "source": "policyengine-stack.toml", + "packages": packages, + "extras": stack["extras"], + "countries": stack.get("countries", {}), + "citation": { + "title": f"PolicyEngine stack {stack['stack_version']}", + "version": stack["stack_version"], + "type": "software-stack", + "publisher": "PolicyEngine", + }, + } + + +def manifest_text(stack: Mapping[str, Any]) -> str: + return json.dumps(generated_manifest(stack), indent=2, sort_keys=True) + "\n" + + +def update_pyproject_text(pyproject_text: str, stack: Mapping[str, Any]) -> str: + pyproject = tomllib.loads(pyproject_text) + optional = pyproject["project"].get("optional-dependencies", {}) + stack_extras = stack["extras"] + + kept_extras: dict[str, list[str]] = {} + for name, dependencies in optional.items(): + if name == "dev" or name in stack_extras: + continue + kept_extras[name] = list(dependencies) + + generated_extras = { + name: [ + exact_requirement(stack["packages"][package_name]) + for package_name in package_names + ] + for name, package_names in stack_extras.items() + } + + first_party_package_names = { + normalized_requirement_name(component["name"]) + for component in stack["packages"].values() + } + dev_dependencies = [ + dependency + for dependency in optional.get("dev", []) + if normalized_requirement_name(dependency) not in first_party_package_names + ] + for package_name in stack_extras.get("models", []): + dev_dependencies.append(exact_requirement(stack["packages"][package_name])) + + replacement = format_optional_dependencies( + { + **kept_extras, + **generated_extras, + "dev": dev_dependencies, + } + ) + start = pyproject_text.index(OPTIONAL_DEPENDENCIES_HEADER) + next_section = NEXT_SECTION_PATTERN.search(pyproject_text, start) + if next_section is None: + raise ValueError("Could not find section after project.optional-dependencies.") + return ( + pyproject_text[:start] + + replacement + + pyproject_text[next_section.start() + 1 :] + ) + + +def exact_requirement(component: Mapping[str, Any]) -> str: + requirement = f"{component['name']}=={component['version']}" + markers = component.get("markers") + if markers: + requirement += f"; {markers}" + return requirement + + +def normalized_requirement_name(dependency: str) -> str: + match = re.match(r"\s*([A-Za-z0-9_.-]+)", dependency) + if match is None: + return "" + return match.group(1).replace("_", "-").lower() + + +def format_optional_dependencies(extras: Mapping[str, list[str]]) -> str: + lines = [OPTIONAL_DEPENDENCIES_HEADER] + for extra_name, dependencies in extras.items(): + lines.append(f"{extra_name} = [") + for dependency in dependencies: + lines.append(f' "{dependency}",') + lines.append("]") + return "\n".join(lines) + "\n\n" + + +def write_or_check(path: Path, content: str, *, check: bool) -> bool: + if path.exists() and path.read_text() == content: + return False + if check: + print(f"{path.relative_to(REPO_ROOT)} is not up to date.", file=sys.stderr) + return True + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content) + print(f"Updated {path.relative_to(REPO_ROOT)}") + return True + + +def generate(*, check: bool = False) -> int: + stack = load_toml(STACK_SOURCE) + changed = False + changed |= write_or_check(STACK_MANIFEST, manifest_text(stack), check=check) + changed |= write_or_check( + PYPROJECT, + update_pyproject_text(PYPROJECT.read_text(), stack), + check=check, + ) + return 1 if check and changed else 0 + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + "--check", + action="store_true", + help="Fail if generated files are not up to date.", + ) + args = parser.parse_args() + return generate(check=args.check) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/prepare_stack_update.py b/scripts/prepare_stack_update.py new file mode 100644 index 00000000..a7357c6f --- /dev/null +++ b/scripts/prepare_stack_update.py @@ -0,0 +1,95 @@ +"""Prepare a PR that only updates the pip-native PolicyEngine stack.""" + +from __future__ import annotations + +import argparse +from typing import Any, Mapping + +from generate_stack_artifacts import REPO_ROOT, STACK_SOURCE, generate, load_toml + +PACKAGE_ARGS = { + "core": "policyengine-core", + "us": "policyengine-us", + "uk": "policyengine-uk", + "us_data": "policyengine-us-data", + "uk_data": "policyengine-uk-data", +} + + +def write_stack_source(stack: Mapping[str, Any]) -> None: + lines: list[str] = [ + f"schema_version = {stack['schema_version']}", + f'stack_version = "{stack["stack_version"]}"', + f'policyengine_version = "{stack["policyengine_version"]}"', + "", + ] + for package_key, package in stack["packages"].items(): + lines.append(f"[packages.{package_key}]") + for key, value in package.items(): + lines.append(_toml_assignment(key, value)) + lines.append("") + lines.append("[extras]") + for extra, packages in stack["extras"].items(): + values = ", ".join(f'"{package}"' for package in packages) + lines.append(f"{extra} = [{values}]") + lines.append("") + for country, metadata in stack.get("countries", {}).items(): + lines.append(f"[countries.{country}]") + for key, value in metadata.items(): + lines.append(_toml_assignment(key, value)) + lines.append("") + STACK_SOURCE.write_text("\n".join(lines).rstrip() + "\n") + print(f"Updated {STACK_SOURCE.relative_to(REPO_ROOT)}") + + +def _toml_assignment(key: str, value: Any) -> str: + if isinstance(value, bool): + return f"{key} = {str(value).lower()}" + return f'{key} = "{value}"' + + +def write_changelog(message: str, fragment_name: str) -> None: + changelog_dir = REPO_ROOT / "changelog.d" + changelog_dir.mkdir(exist_ok=True) + path = changelog_dir / fragment_name + path.write_text(message.strip() + "\n") + print(f"Updated {path.relative_to(REPO_ROOT)}") + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Prepare a stack-only PolicyEngine PR." + ) + for arg_name, package_key in PACKAGE_ARGS.items(): + parser.add_argument( + f"--{arg_name.replace('_', '-')}", + dest=arg_name, + help=f"Exact version for {package_key}.", + ) + parser.add_argument( + "--changelog", + default="Update the certified PolicyEngine stack pins.", + help="Patch changelog text to include with the stack update.", + ) + parser.add_argument( + "--fragment-name", + default="stack-update.fixed.md", + help="Changelog fragment filename under changelog.d/.", + ) + args = parser.parse_args() + + stack = load_toml(STACK_SOURCE) + packages = {key: dict(value) for key, value in stack["packages"].items()} + stack = {**stack, "packages": packages} + for arg_name, package_key in PACKAGE_ARGS.items(): + version = getattr(args, arg_name) + if version: + packages[package_key]["version"] = version + write_stack_source(stack) + generate(check=False) + write_changelog(args.changelog, args.fragment_name) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/policyengine/__init__.py b/src/policyengine/__init__.py index a8de3971..6948ae81 100644 --- a/src/policyengine/__init__.py +++ b/src/policyengine/__init__.py @@ -28,17 +28,20 @@ (the pinned ``TaxBenefitModelVersion``), and the microsim helpers. """ +import os from importlib.util import find_spec from policyengine import outputs as outputs from policyengine.core import Simulation as Simulation -if find_spec("policyengine_us") is not None: +_skip_country_imports = os.environ.get("POLICYENGINE_SKIP_COUNTRY_IMPORTS") == "1" + +if not _skip_country_imports and find_spec("policyengine_us") is not None: from policyengine.tax_benefit_models import us as us else: # pragma: no cover us = None # type: ignore[assignment] -if find_spec("policyengine_uk") is not None: +if not _skip_country_imports and find_spec("policyengine_uk") is not None: from policyengine.tax_benefit_models import uk as uk else: # pragma: no cover uk = None # type: ignore[assignment] diff --git a/src/policyengine/cli.py b/src/policyengine/cli.py index a282c718..f8eb2499 100644 --- a/src/policyengine/cli.py +++ b/src/policyengine/cli.py @@ -27,6 +27,11 @@ build_trace_tro_from_release_bundle, serialize_trace_tro, ) +from policyengine.stack import ( + format_stack_citation, + get_current_stack, + verify_installed_stack, +) def _parser() -> argparse.ArgumentParser: @@ -65,6 +70,53 @@ def _parser() -> argparse.ArgumentParser: ) bundle.add_argument("country", help="Country id (e.g. us, uk).") + stack = subparsers.add_parser( + "stack", + help="Inspect or verify the packaged PolicyEngine stack manifest.", + ) + stack_subparsers = stack.add_subparsers(dest="stack_command", required=True) + + stack_show = stack_subparsers.add_parser( + "show", + help="Print the packaged stack manifest as JSON.", + ) + stack_show.add_argument( + "--extra", + help="Only include package components used by this extra.", + ) + + stack_verify = stack_subparsers.add_parser( + "verify", + help="Verify installed packages against the packaged stack manifest.", + ) + stack_verify.add_argument( + "--extra", + help=( + "Require every component in this extra, e.g. models or full. " + "Without this, missing optional components are skipped." + ), + ) + stack_verify.add_argument( + "--no-imports", + action="store_true", + help="Check installed versions without importing component modules.", + ) + stack_verify.add_argument( + "--check-uris", + action="store_true", + help="Perform lightweight HEAD/GET checks for stack metadata URIs.", + ) + stack_verify.add_argument( + "--json", + action="store_true", + help="Print the full verification report as JSON.", + ) + + stack_subparsers.add_parser( + "cite", + help="Print a concise citation for the packaged stack.", + ) + return parser @@ -119,6 +171,41 @@ def _emit_release_manifest(country_id: str) -> int: return 0 +def _emit_stack(extra: Optional[str]) -> int: + stack = get_current_stack() + if extra is not None: + package_names = {"policyengine", *stack["extras"][extra]} + stack = { + **stack, + "packages": { + name: package + for name, package in stack["packages"].items() + if name in package_names + }, + } + print(json.dumps(stack, indent=2, sort_keys=True)) + return 0 + + +def _verify_stack(args: argparse.Namespace) -> int: + report = verify_installed_stack( + extra=args.extra, + check_imports=not args.no_imports, + check_uris=args.check_uris, + ) + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + status = "ok" if report["passed"] else "failed" + print(f"PolicyEngine stack {report['stack_version']}: {status}") + for check in report["checks"]: + label = check.get("component") or check.get("uri") + message = check.get("message") + suffix = f" ({message})" if message else "" + print(f"- {label}: {check['status']}{suffix}") + return 0 if report["passed"] else 1 + + def main(argv: Optional[Sequence[str]] = None) -> int: args = _parser().parse_args(argv) if args.command == "trace-tro": @@ -127,6 +214,14 @@ def main(argv: Optional[Sequence[str]] = None) -> int: return _validate_tro(args.path) if args.command == "release-manifest": return _emit_release_manifest(args.country) + if args.command == "stack": + if args.stack_command == "show": + return _emit_stack(args.extra) + if args.stack_command == "verify": + return _verify_stack(args) + if args.stack_command == "cite": + print(format_stack_citation()) + return 0 return 1 diff --git a/src/policyengine/data/stack/manifest.json b/src/policyengine/data/stack/manifest.json new file mode 100644 index 00000000..2faa1168 --- /dev/null +++ b/src/policyengine/data/stack/manifest.json @@ -0,0 +1,122 @@ +{ + "citation": { + "publisher": "PolicyEngine", + "title": "PolicyEngine stack 4.4.2", + "type": "software-stack", + "version": "4.4.2" + }, + "countries": { + "uk": { + "data_artifact_version": "1.55.5", + "data_package": "policyengine-uk-data", + "default_dataset": "enhanced_frs_2023_24", + "default_dataset_uri": "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.55.5", + "model_package": "policyengine-uk", + "release_manifest_uri": "https://huggingface.co/policyengine/policyengine-uk-data-private/resolve/1.55.5/release_manifest.json" + }, + "us": { + "data_package": "policyengine-us-data", + "default_dataset": "enhanced_cps_2024", + "default_dataset_uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2", + "model_package": "policyengine-us", + "release_manifest_uri": "https://huggingface.co/policyengine/policyengine-us-data/resolve/9cb665df0a546f9c3d79b496f8eb2dd55859d38d/releases/1.78.2/release_manifest.json" + } + }, + "extras": { + "data": [ + "policyengine-us-data", + "policyengine-uk-data" + ], + "full": [ + "policyengine-core", + "policyengine-us", + "policyengine-uk", + "policyengine-us-data", + "policyengine-uk-data" + ], + "models": [ + "policyengine-core", + "policyengine-us", + "policyengine-uk" + ], + "uk": [ + "policyengine-core", + "policyengine-uk" + ], + "uk-data": [ + "policyengine-uk-data" + ], + "uk-full": [ + "policyengine-core", + "policyengine-uk", + "policyengine-uk-data" + ], + "us": [ + "policyengine-core", + "policyengine-us" + ], + "us-data": [ + "policyengine-us-data" + ], + "us-full": [ + "policyengine-core", + "policyengine-us", + "policyengine-us-data" + ] + }, + "packages": { + "policyengine": { + "import_name": "policyengine", + "install_requirement": "policyengine==4.4.2", + "name": "policyengine", + "role": "stack_carrier", + "version": "4.4.2" + }, + "policyengine-core": { + "import_name": "policyengine_core", + "install_requirement": "policyengine-core==3.26.1", + "name": "policyengine-core", + "role": "runtime_dependency", + "version": "3.26.1" + }, + "policyengine-uk": { + "country": "uk", + "import_name": "policyengine_uk", + "install_requirement": "policyengine-uk==2.88.14", + "name": "policyengine-uk", + "role": "country_model", + "version": "2.88.14" + }, + "policyengine-uk-data": { + "country": "uk", + "import_name": "policyengine_uk_data", + "install_requirement": "policyengine-uk-data==1.11.1", + "name": "policyengine-uk-data", + "optional": true, + "role": "country_data", + "version": "1.11.1" + }, + "policyengine-us": { + "country": "us", + "import_name": "policyengine_us", + "install_requirement": "policyengine-us==1.687.0", + "name": "policyengine-us", + "role": "country_model", + "version": "1.687.0" + }, + "policyengine-us-data": { + "country": "us", + "import_name": "policyengine_us_data", + "install_requirement": "policyengine-us-data==1.78.2; python_version >= '3.12' and python_version < '3.15'", + "markers": "python_version >= '3.12' and python_version < '3.15'", + "name": "policyengine-us-data", + "optional": true, + "role": "country_data", + "version": "1.78.2" + } + }, + "policyengine_version": "4.4.2", + "schema_version": 1, + "source": "policyengine-stack.toml", + "stack_version": "4.4.2" +} diff --git a/src/policyengine/stack.py b/src/policyengine/stack.py new file mode 100644 index 00000000..abbb178c --- /dev/null +++ b/src/policyengine/stack.py @@ -0,0 +1,224 @@ +"""PolicyEngine stack manifest inspection and verification. + +The stack manifest is the pip-native replacement for release bundles: it names +the exact first-party package set certified for a ``policyengine`` release. +Installation remains standard pip extras; this module only reports and verifies +what is installed. +""" + +from __future__ import annotations + +import json +from functools import lru_cache +from importlib import metadata +from importlib.resources import files +from importlib.util import find_spec +from typing import Any, Mapping, Optional + +import requests + +STACK_MANIFEST_RESOURCE = ("data", "stack", "manifest.json") +URI_CHECK_TIMEOUT_SECONDS = 5 + + +class StackError(ValueError): + """Raised when stack metadata is missing or inconsistent.""" + + +def _stack_resource_path(): + path = files("policyengine") + for part in STACK_MANIFEST_RESOURCE: + path = path.joinpath(part) + return path + + +@lru_cache +def get_current_stack() -> dict[str, Any]: + """Return the stack manifest packaged with this ``policyengine`` wheel.""" + resource = _stack_resource_path() + try: + return json.loads(resource.read_text()) + except FileNotFoundError as exc: + raise StackError("No packaged PolicyEngine stack manifest found.") from exc + + +def get_component(name: str) -> dict[str, Any]: + """Return one component from the current stack manifest.""" + components = get_current_stack().get("packages", {}) + key = _component_key(name) + try: + return components[key] + except KeyError as exc: + raise StackError(f"No stack component named {name!r}.") from exc + + +def get_extra(name: str) -> list[str]: + """Return the component names included by a pip extra.""" + extras = get_current_stack().get("extras", {}) + try: + return list(extras[name]) + except KeyError as exc: + raise StackError(f"No stack extra named {name!r}.") from exc + + +def stack_install_requirements(extra: str = "full") -> list[str]: + """Return exact pip requirements for a stack extra.""" + stack = get_current_stack() + requirements = [f"policyengine=={stack['policyengine_version']}"] + for component_name in get_extra(extra): + component = get_component(component_name) + requirements.append(component["install_requirement"]) + return requirements + + +def verify_installed_stack( + *, + extra: Optional[str] = None, + check_imports: bool = True, + check_uris: bool = False, + uri_timeout_seconds: int = URI_CHECK_TIMEOUT_SECONDS, +) -> dict[str, Any]: + """Verify installed packages against the packaged stack manifest. + + Without ``extra``, this verifies ``policyengine`` plus every stack component + that is already installed and marks missing optional components as skipped. + With ``extra``, all components in that extra are required. + """ + stack = get_current_stack() + required = _required_components(stack, extra) + component_checks = [ + _verify_component( + key, + component, + required=key in required, + check_imports=check_imports, + ) + for key, component in stack.get("packages", {}).items() + if key in required or extra is None + ] + uri_checks = ( + _verify_uris(stack.get("countries", {}), timeout=uri_timeout_seconds) + if check_uris + else [] + ) + checks: list[Mapping[str, Any]] = [*component_checks, *uri_checks] + passed = all(check["status"] in {"ok", "skipped"} for check in checks) + return { + "schema_version": 1, + "stack_version": stack.get("stack_version"), + "policyengine_version": stack.get("policyengine_version"), + "extra": extra, + "passed": passed, + "checks": checks, + } + + +def format_stack_citation() -> str: + """Return a concise human-readable citation for the current stack.""" + stack = get_current_stack() + package_lines = [ + f"- {component['name']} {component['version']}" + for _, component in sorted(stack.get("packages", {}).items()) + ] + return "\n".join( + [ + f"PolicyEngine stack {stack['stack_version']}", + f"PolicyEngine package version: {stack['policyengine_version']}", + "Components:", + *package_lines, + ] + ) + + +def _component_key(name: str) -> str: + return name.replace("_", "-").lower() + + +def _required_components(stack: Mapping[str, Any], extra: Optional[str]) -> set[str]: + if extra is None: + return {"policyengine"} + extras = stack.get("extras", {}) + if extra not in extras: + raise StackError(f"No stack extra named {extra!r}.") + return {"policyengine", *(_component_key(name) for name in extras[extra])} + + +def _verify_component( + key: str, + component: Mapping[str, Any], + *, + required: bool, + check_imports: bool, +) -> dict[str, Any]: + package_name = str(component["name"]) + expected_version = str(component["version"]) + check: dict[str, Any] = { + "kind": "component", + "component": key, + "package": package_name, + "expected_version": expected_version, + } + try: + installed_version = metadata.version(package_name) + except metadata.PackageNotFoundError: + check["status"] = "missing" if required else "skipped" + check["message"] = "Package is not installed." + return check + + check["installed_version"] = installed_version + if installed_version != expected_version: + check["status"] = "mismatch" + check["message"] = "Installed version does not match stack pin." + return check + + import_name = component.get("import_name") + if check_imports and import_name: + if find_spec(str(import_name)) is None: + check["status"] = "import_error" + check["message"] = f"Import module {import_name!r} is not discoverable." + return check + + check["status"] = "ok" + return check + + +def _verify_uris( + countries: Mapping[str, Mapping[str, Any]], + *, + timeout: int, +) -> list[dict[str, Any]]: + checks: list[dict[str, Any]] = [] + for country_id, country in sorted(countries.items()): + for field in ("release_manifest_uri",): + uri = country.get(field) + if not uri: + continue + checks.append(_verify_uri(country_id, field, str(uri), timeout=timeout)) + return checks + + +def _verify_uri( + country_id: str, field: str, uri: str, *, timeout: int +) -> dict[str, Any]: + check: dict[str, Any] = { + "kind": "uri", + "country": country_id, + "field": field, + "uri": uri, + } + try: + response = requests.head(uri, allow_redirects=True, timeout=timeout) + if response.status_code == 405: + response = requests.get(uri, stream=True, timeout=timeout) + except requests.RequestException as exc: + check["status"] = "unreachable" + check["message"] = str(exc) + return check + + check["status_code"] = response.status_code + if response.status_code in {200, 401, 403}: + check["status"] = "ok" + else: + check["status"] = "bad_status" + check["message"] = f"HTTP {response.status_code}" + return check diff --git a/tests/test_stack.py b/tests/test_stack.py new file mode 100644 index 00000000..9278175d --- /dev/null +++ b/tests/test_stack.py @@ -0,0 +1,113 @@ +import json + +import pytest + +from policyengine import stack +from policyengine.cli import main as cli_main + + +def test_stack_manifest_exposes_full_and_slice_extras(): + manifest = stack.get_current_stack() + + assert manifest["stack_version"] == manifest["policyengine_version"] + assert manifest["extras"]["full"] == [ + "policyengine-core", + "policyengine-us", + "policyengine-uk", + "policyengine-us-data", + "policyengine-uk-data", + ] + assert manifest["extras"]["models"] == [ + "policyengine-core", + "policyengine-us", + "policyengine-uk", + ] + assert manifest["extras"]["us-full"] == [ + "policyengine-core", + "policyengine-us", + "policyengine-us-data", + ] + assert manifest["extras"]["uk-full"] == [ + "policyengine-core", + "policyengine-uk", + "policyengine-uk-data", + ] + + +def test_stack_install_requirements_are_exact_pins(): + manifest = stack.get_current_stack() + + assert stack.stack_install_requirements("us") == [ + f"policyengine=={manifest['policyengine_version']}", + "policyengine-core==3.26.1", + "policyengine-us==1.687.0", + ] + + +def test_verify_installed_stack_passes_for_matching_extra(monkeypatch): + manifest = stack.get_current_stack() + versions = { + component["name"]: component["version"] + for component in manifest["packages"].values() + } + + monkeypatch.setattr(stack.metadata, "version", lambda name: versions[name]) + monkeypatch.setattr(stack, "find_spec", lambda name: object()) + + report = stack.verify_installed_stack(extra="models") + + assert report["passed"] is True + assert {check["status"] for check in report["checks"]} == {"ok"} + + +def test_verify_installed_stack_reports_version_mismatch(monkeypatch): + manifest = stack.get_current_stack() + versions = { + component["name"]: component["version"] + for component in manifest["packages"].values() + } + versions["policyengine-us"] = "0.0.0" + + monkeypatch.setattr(stack.metadata, "version", lambda name: versions[name]) + + report = stack.verify_installed_stack(extra="us", check_imports=False) + + assert report["passed"] is False + mismatch = next( + check for check in report["checks"] if check.get("package") == "policyengine-us" + ) + assert mismatch["status"] == "mismatch" + + +def test_stack_show_cli_outputs_manifest_json(capsys): + exit_code = cli_main(["stack", "show", "--extra", "us"]) + + assert exit_code == 0 + payload = json.loads(capsys.readouterr().out) + assert set(payload["packages"]) == { + "policyengine", + "policyengine-core", + "policyengine-us", + } + + +def test_stack_verify_cli_outputs_json(monkeypatch, capsys): + manifest = stack.get_current_stack() + versions = { + component["name"]: component["version"] + for component in manifest["packages"].values() + } + monkeypatch.setattr(stack.metadata, "version", lambda name: versions[name]) + + exit_code = cli_main( + ["stack", "verify", "--extra", "models", "--no-imports", "--json"] + ) + + assert exit_code == 0 + payload = json.loads(capsys.readouterr().out) + assert payload["passed"] is True + + +def test_unknown_extra_is_named(): + with pytest.raises(stack.StackError, match="No stack extra"): + stack.get_extra("ghost")