PolicyEngine · vahid-ahmadi · Jun 18, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 18, 2026
diff --git a/changelog.d/431.md b/changelog.d/431.md
@@ -0,0 +1 @@
+- Calibrate `bus_fare_spending` and `bus_subsidy_spending` to DfT Annual Bus Statistics (year ending March 2025, England) totals — passenger fare receipts £3.4bn (table BUS05aii) and net government support £3.0bn (table BUS05bii), uplifted England→UK by population — via post-calibration scaling steps mirroring the rail subsidy calibration. Without anchoring, imputed bus fare inherited the broader transport-consumption over-estimate (~£10bn, ~3× too high) and bus subsidy drifted low (~£1.5bn). Adds tests asserting both bus totals match the DfT targets within 20% in the built dataset (skipped in PR CI where no dataset is built, active on the post-merge build, like test_energy_calibration).
diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py
@@ -292,20 +292,30 @@ def main():
             update_dataset("Calibrate public service aggregates", "processing")
             from policyengine_uk_data.datasets.imputations.services.services import (
                 calibrate_rail_subsidy_spending,
+                calibrate_bus_subsidy_spending,
             )
 
             calibrate_rail_subsidy_spending(
                 frs_calibrated,
                 frs_release.calibration_year,
             )
+            calibrate_bus_subsidy_spending(
+                frs_calibrated,
+                frs_release.calibration_year,
+            )
             update_dataset("Calibrate public service aggregates", "completed")
 
             update_dataset("Calibrate fuel litres", "processing")
             from policyengine_uk_data.datasets.imputations.consumption import (
                 calibrate_dataset_fuel_litre_proxies_to_road_fuel,
+                calibrate_bus_fare_spending,
             )
 
             calibrate_dataset_fuel_litre_proxies_to_road_fuel(frs_calibrated)
+            calibrate_bus_fare_spending(
+                frs_calibrated,
+                frs_release.calibration_year,
+            )
             update_dataset("Calibrate fuel litres", "completed")
 
             update_dataset("Save final dataset", "processing")

diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py
@@ -781,6 +781,51 @@ def calibrate_dataset_fuel_litre_proxies_to_road_fuel(
     )
 
 
+# England → UK uplift for England-only DfT bus figures: ONS mid-2023 population
+# ratio (UK 68.3M / England 57.7M ≈ 1.18), a best approximation since DfT
+# publishes no single GB/UK bus-finance total. Indicative — bus use per head
+# varies by nation. https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates
+ENGLAND_TO_UK_POPULATION_UPLIFT = 68.3 / 57.7  # ≈ 1.18
+
+BUS_FARE_TARGETS = {
+    # DfT Annual Bus Statistics, year ending March 2025 (England), table
+    # BUS05aii: passenger fare receipts on local bus services were GBP 3.4bn
+    # (52% of GBP 6.6bn total operating revenue), uplifted England → UK by
+    # population (≈ GBP 4.0bn UK). Without anchoring, the imputed aggregate
+    # inherits the broader transport-consumption over-estimate (~GBP 10bn).
+    # https://www.gov.uk/government/statistics/annual-bus-statistics-year-ending-march-2025/annual-bus-statistics-year-ending-march-2025
+    2025: 3.4e9 * ENGLAND_TO_UK_POPULATION_UPLIFT,
+}
+
+
+def calibrate_bus_fare_spending(
+    dataset: UKSingleYearDataset,
+    time_period: int,
+) -> float | None:
+    """Scale bus_fare_spending to the DfT passenger-fare total (BUS_FARE_TARGETS)."""
+    target = BUS_FARE_TARGETS.get(time_period)
+    if target is None:
+        return None
+
+    original_time_period = dataset.time_period
+    dataset.time_period = str(original_time_period)
+    try:
+        simulation = Microsimulation(dataset=dataset)
+        actual = simulation.calculate(
+            "bus_fare_spending",
+            period=time_period,
+            map_to="household",
+        ).sum()
+    finally:
+        dataset.time_period = original_time_period
+    if actual <= 0:
+        raise ValueError(f"Cannot calibrate bus_fare_spending: aggregate is {actual}.")
+
+    scale = target / actual
+    dataset.household["bus_fare_spending"] *= scale
+    return scale
+
+
 def save_imputation_models():
     from policyengine_uk_data.utils.qrf import QRF
 

diff --git a/policyengine_uk_data/datasets/imputations/services/services.py b/policyengine_uk_data/datasets/imputations/services/services.py
@@ -21,6 +21,24 @@
     2025: 21.6e9,
 }
 
+# England → UK uplift for England-only DfT bus figures. DfT publishes no single
+# GB/UK bus-finance total, so we scale by the ONS mid-2023 population ratio
+# (UK 68.3M / England 57.7M ≈ 1.18) as a best approximation. This is indicative:
+# bus use per head varies by nation (London lifts England's per-capita use), so
+# the true UK factor is likely a little below the population ratio.
+# ONS mid-year population estimates:
+# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates
+ENGLAND_TO_UK_POPULATION_UPLIFT = 68.3 / 57.7  # ≈ 1.18
+
+BUS_SUBSIDY_TARGETS = {
+    # DfT Annual Bus Statistics, year ending March 2025 (England), table
+    # BUS05bii: total net government support for local bus services was
+    # GBP 3.0bn (of which GBP 0.8bn concessionary travel reimbursement),
+    # uplifted England → UK by population (≈ GBP 3.5bn UK).
+    # https://www.gov.uk/government/statistics/annual-bus-statistics-year-ending-march-2025/annual-bus-statistics-year-ending-march-2025
+    2025: 3.0e9 * ENGLAND_TO_UK_POPULATION_UPLIFT,
+}
+
 
 def get_fare_index_survey_year() -> float:
     """
@@ -66,6 +84,36 @@ def calibrate_rail_subsidy_spending(
     return scale
 
 
+def calibrate_bus_subsidy_spending(
+    dataset: UKSingleYearDataset,
+    time_period: int,
+) -> float | None:
+    """Scale bus_subsidy_spending to the DfT net-support total (BUS_SUBSIDY_TARGETS)."""
+    target = BUS_SUBSIDY_TARGETS.get(time_period)
+    if target is None:
+        return None
+
+    original_time_period = dataset.time_period
+    dataset.time_period = str(original_time_period)
+    try:
+        simulation = Microsimulation(dataset=dataset)
+        actual = simulation.calculate(
+            "bus_subsidy_spending",
+            period=time_period,
+            map_to="household",
+        ).sum()
+    finally:
+        dataset.time_period = original_time_period
+    if actual <= 0:
+        raise ValueError(
+            f"Cannot calibrate bus_subsidy_spending: aggregate is {actual}."
+        )
+
+    scale = target / actual
+    dataset.household["bus_subsidy_spending"] *= scale
+    return scale
+
+
 def impute_services(
     dataset: UKSingleYearDataset,
 ) -> UKSingleYearDataset:

diff --git a/policyengine_uk_data/tests/test_aggregates.py b/policyengine_uk_data/tests/test_aggregates.py
@@ -6,15 +6,15 @@
     # ORR/GOV.UK rail finance statistics report GBP 21.6bn of government
     # support to the rail industry in 2024-25.
     "rail_subsidy_spending": 21.6e9,
-    # Approximate public support for local bus services; kept as a loose
-    # smoke-test target because source coverage and dataset coverage differ.
-    "bus_subsidy_spending": 2.5e9,
-    # DfT Annual Bus Statistics (year ending March 2025) report GBP 3.4bn
-    # passenger fare receipts for local bus services in England. The LCFS input
-    # is UK household bus/coach fare spending, so this is an order-of-magnitude
-    # target. Enable once a dataset built with the bus_fare_spending imputation
-    # is published — the column is absent from the currently-released dataset.
-    # "bus_fare_spending": 3.4e9,
+    # DfT Annual Bus Statistics (year ending March 2025, England), table
+    # BUS05bii: net government support ~GBP 3.0bn, uplifted England→UK by
+    # population (~GBP 3.5bn). bus_subsidy_spending is calibrated to this.
+    "bus_subsidy_spending": 3.0e9 * 68.3 / 57.7,
+    # DfT Annual Bus Statistics (year ending March 2025, England), table
+    # BUS05aii: passenger fare receipts ~GBP 3.4bn, uplifted England→UK by
+    # population (~GBP 4.0bn). bus_fare_spending is calibrated to this. Enable
+    # once a dataset built with that calibration is published (released predates).
+    # "bus_fare_spending": 3.4e9 * 68.3 / 57.7,
 }
 
 

diff --git a/policyengine_uk_data/tests/test_bus_fare_spending_in_dataset.py b/policyengine_uk_data/tests/test_bus_fare_spending_in_dataset.py
@@ -0,0 +1,30 @@
+"""Bus fare / subsidy totals in the built dataset must match the DfT targets.
+
+These use the enhanced FRS dataset, which is produced by ``make data`` (the
+build / push CI / local generation) and is *not* fetched by ``make download``.
+So the `baseline` fixture skips them in PR CI (no built dataset) and runs them
+after a build, against the freshly calibrated data — the same pattern as
+test_energy_calibration. Both bus variables are calibrated to the official DfT
+totals in the build, so the totals should match closely; a 20% band is allowed.
+"""
+
+import pytest
+
+# DfT Annual Bus Statistics, year ending March 2025 (England), uplifted
+# England -> UK by ONS mid-2023 population (x 68.3 / 57.7):
+#   bus_fare_spending    -> BUS05aii passenger fare receipts £3.4bn (~£4.0bn UK)
+#   bus_subsidy_spending -> BUS05bii net government support  £3.0bn (~£3.5bn UK)
+# https://www.gov.uk/government/statistics/annual-bus-statistics-year-ending-march-2025/annual-bus-statistics-year-ending-march-2025
+BUS_TARGETS = {
+    "bus_fare_spending": 3.4e9 * 68.3 / 57.7,
+    "bus_subsidy_spending": 3.0e9 * 68.3 / 57.7,
+}
+
+
+@pytest.mark.parametrize("variable,target", sorted(BUS_TARGETS.items()))
+def test_bus_total_matches_dft_target(baseline, variable: str, target: float):
+    total = baseline.calculate(variable, map_to="household", period=2025).sum()
+    assert abs(total / target - 1) < 0.2, (
+        f"{variable}: £{total / 1e9:.2f}bn vs DfT target £{target / 1e9:.2f}bn "
+        f"(relative error {abs(total / target - 1):.1%})."
+    )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		- Calibrate `bus_fare_spending` and `bus_subsidy_spending` to DfT Annual Bus Statistics (year ending March 2025, England) totals — passenger fare receipts £3.4bn (table BUS05aii) and net government support £3.0bn (table BUS05bii), uplifted England→UK by population — via post-calibration scaling steps mirroring the rail subsidy calibration. Without anchoring, imputed bus fare inherited the broader transport-consumption over-estimate (~£10bn, ~3× too high) and bus subsidy drifted low (~£1.5bn). Adds tests asserting both bus totals match the DfT targets within 20% in the built dataset (skipped in PR CI where no dataset is built, active on the post-merge build, like test_energy_calibration).