diff --git a/changelog.d/431.md b/changelog.d/431.md new file mode 100644 index 00000000..a8180a96 --- /dev/null +++ b/changelog.d/431.md @@ -0,0 +1 @@ +- Calibrate `bus_fare_spending` and `bus_subsidy_spending` to DfT Annual Bus Statistics (year ending March 2025, England) totals — passenger fare receipts £3.4bn (table BUS05aii) and net government support £3.0bn (table BUS05bii), uplifted England→UK by population — via post-calibration scaling steps mirroring the rail subsidy calibration. Without anchoring, imputed bus fare inherited the broader transport-consumption over-estimate (~£10bn, ~3× too high) and bus subsidy drifted low (~£1.5bn). Adds tests asserting both bus totals match the DfT targets within 20% in the built dataset (skipped in PR CI where no dataset is built, active on the post-merge build, like test_energy_calibration). diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py index 391896db..bebdc5d7 100644 --- a/policyengine_uk_data/datasets/create_datasets.py +++ b/policyengine_uk_data/datasets/create_datasets.py @@ -292,20 +292,30 @@ def main(): update_dataset("Calibrate public service aggregates", "processing") from policyengine_uk_data.datasets.imputations.services.services import ( calibrate_rail_subsidy_spending, + calibrate_bus_subsidy_spending, ) calibrate_rail_subsidy_spending( frs_calibrated, frs_release.calibration_year, ) + calibrate_bus_subsidy_spending( + frs_calibrated, + frs_release.calibration_year, + ) update_dataset("Calibrate public service aggregates", "completed") update_dataset("Calibrate fuel litres", "processing") from policyengine_uk_data.datasets.imputations.consumption import ( calibrate_dataset_fuel_litre_proxies_to_road_fuel, + calibrate_bus_fare_spending, ) calibrate_dataset_fuel_litre_proxies_to_road_fuel(frs_calibrated) + calibrate_bus_fare_spending( + frs_calibrated, + frs_release.calibration_year, + ) update_dataset("Calibrate fuel litres", "completed") update_dataset("Save final dataset", "processing") diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py index 3742bb04..e41914fe 100644 --- a/policyengine_uk_data/datasets/imputations/consumption.py +++ b/policyengine_uk_data/datasets/imputations/consumption.py @@ -781,6 +781,51 @@ def calibrate_dataset_fuel_litre_proxies_to_road_fuel( ) +# England → UK uplift for England-only DfT bus figures: ONS mid-2023 population +# ratio (UK 68.3M / England 57.7M ≈ 1.18), a best approximation since DfT +# publishes no single GB/UK bus-finance total. Indicative — bus use per head +# varies by nation. https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates +ENGLAND_TO_UK_POPULATION_UPLIFT = 68.3 / 57.7 # ≈ 1.18 + +BUS_FARE_TARGETS = { + # DfT Annual Bus Statistics, year ending March 2025 (England), table + # BUS05aii: passenger fare receipts on local bus services were GBP 3.4bn + # (52% of GBP 6.6bn total operating revenue), uplifted England → UK by + # population (≈ GBP 4.0bn UK). Without anchoring, the imputed aggregate + # inherits the broader transport-consumption over-estimate (~GBP 10bn). + # https://www.gov.uk/government/statistics/annual-bus-statistics-year-ending-march-2025/annual-bus-statistics-year-ending-march-2025 + 2025: 3.4e9 * ENGLAND_TO_UK_POPULATION_UPLIFT, +} + + +def calibrate_bus_fare_spending( + dataset: UKSingleYearDataset, + time_period: int, +) -> float | None: + """Scale bus_fare_spending to the DfT passenger-fare total (BUS_FARE_TARGETS).""" + target = BUS_FARE_TARGETS.get(time_period) + if target is None: + return None + + original_time_period = dataset.time_period + dataset.time_period = str(original_time_period) + try: + simulation = Microsimulation(dataset=dataset) + actual = simulation.calculate( + "bus_fare_spending", + period=time_period, + map_to="household", + ).sum() + finally: + dataset.time_period = original_time_period + if actual <= 0: + raise ValueError(f"Cannot calibrate bus_fare_spending: aggregate is {actual}.") + + scale = target / actual + dataset.household["bus_fare_spending"] *= scale + return scale + + def save_imputation_models(): from policyengine_uk_data.utils.qrf import QRF diff --git a/policyengine_uk_data/datasets/imputations/services/services.py b/policyengine_uk_data/datasets/imputations/services/services.py index 43ab2639..9c92862c 100644 --- a/policyengine_uk_data/datasets/imputations/services/services.py +++ b/policyengine_uk_data/datasets/imputations/services/services.py @@ -21,6 +21,24 @@ 2025: 21.6e9, } +# England → UK uplift for England-only DfT bus figures. DfT publishes no single +# GB/UK bus-finance total, so we scale by the ONS mid-2023 population ratio +# (UK 68.3M / England 57.7M ≈ 1.18) as a best approximation. This is indicative: +# bus use per head varies by nation (London lifts England's per-capita use), so +# the true UK factor is likely a little below the population ratio. +# ONS mid-year population estimates: +# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates +ENGLAND_TO_UK_POPULATION_UPLIFT = 68.3 / 57.7 # ≈ 1.18 + +BUS_SUBSIDY_TARGETS = { + # DfT Annual Bus Statistics, year ending March 2025 (England), table + # BUS05bii: total net government support for local bus services was + # GBP 3.0bn (of which GBP 0.8bn concessionary travel reimbursement), + # uplifted England → UK by population (≈ GBP 3.5bn UK). + # https://www.gov.uk/government/statistics/annual-bus-statistics-year-ending-march-2025/annual-bus-statistics-year-ending-march-2025 + 2025: 3.0e9 * ENGLAND_TO_UK_POPULATION_UPLIFT, +} + def get_fare_index_survey_year() -> float: """ @@ -66,6 +84,36 @@ def calibrate_rail_subsidy_spending( return scale +def calibrate_bus_subsidy_spending( + dataset: UKSingleYearDataset, + time_period: int, +) -> float | None: + """Scale bus_subsidy_spending to the DfT net-support total (BUS_SUBSIDY_TARGETS).""" + target = BUS_SUBSIDY_TARGETS.get(time_period) + if target is None: + return None + + original_time_period = dataset.time_period + dataset.time_period = str(original_time_period) + try: + simulation = Microsimulation(dataset=dataset) + actual = simulation.calculate( + "bus_subsidy_spending", + period=time_period, + map_to="household", + ).sum() + finally: + dataset.time_period = original_time_period + if actual <= 0: + raise ValueError( + f"Cannot calibrate bus_subsidy_spending: aggregate is {actual}." + ) + + scale = target / actual + dataset.household["bus_subsidy_spending"] *= scale + return scale + + def impute_services( dataset: UKSingleYearDataset, ) -> UKSingleYearDataset: diff --git a/policyengine_uk_data/tests/test_aggregates.py b/policyengine_uk_data/tests/test_aggregates.py index 6a63c2be..0da69a04 100644 --- a/policyengine_uk_data/tests/test_aggregates.py +++ b/policyengine_uk_data/tests/test_aggregates.py @@ -6,15 +6,15 @@ # ORR/GOV.UK rail finance statistics report GBP 21.6bn of government # support to the rail industry in 2024-25. "rail_subsidy_spending": 21.6e9, - # Approximate public support for local bus services; kept as a loose - # smoke-test target because source coverage and dataset coverage differ. - "bus_subsidy_spending": 2.5e9, - # DfT Annual Bus Statistics (year ending March 2025) report GBP 3.4bn - # passenger fare receipts for local bus services in England. The LCFS input - # is UK household bus/coach fare spending, so this is an order-of-magnitude - # target. Enable once a dataset built with the bus_fare_spending imputation - # is published — the column is absent from the currently-released dataset. - # "bus_fare_spending": 3.4e9, + # DfT Annual Bus Statistics (year ending March 2025, England), table + # BUS05bii: net government support ~GBP 3.0bn, uplifted England→UK by + # population (~GBP 3.5bn). bus_subsidy_spending is calibrated to this. + "bus_subsidy_spending": 3.0e9 * 68.3 / 57.7, + # DfT Annual Bus Statistics (year ending March 2025, England), table + # BUS05aii: passenger fare receipts ~GBP 3.4bn, uplifted England→UK by + # population (~GBP 4.0bn). bus_fare_spending is calibrated to this. Enable + # once a dataset built with that calibration is published (released predates). + # "bus_fare_spending": 3.4e9 * 68.3 / 57.7, } diff --git a/policyengine_uk_data/tests/test_bus_fare_spending_in_dataset.py b/policyengine_uk_data/tests/test_bus_fare_spending_in_dataset.py new file mode 100644 index 00000000..bfbb7bad --- /dev/null +++ b/policyengine_uk_data/tests/test_bus_fare_spending_in_dataset.py @@ -0,0 +1,30 @@ +"""Bus fare / subsidy totals in the built dataset must match the DfT targets. + +These use the enhanced FRS dataset, which is produced by ``make data`` (the +build / push CI / local generation) and is *not* fetched by ``make download``. +So the `baseline` fixture skips them in PR CI (no built dataset) and runs them +after a build, against the freshly calibrated data — the same pattern as +test_energy_calibration. Both bus variables are calibrated to the official DfT +totals in the build, so the totals should match closely; a 20% band is allowed. +""" + +import pytest + +# DfT Annual Bus Statistics, year ending March 2025 (England), uplifted +# England -> UK by ONS mid-2023 population (x 68.3 / 57.7): +# bus_fare_spending -> BUS05aii passenger fare receipts £3.4bn (~£4.0bn UK) +# bus_subsidy_spending -> BUS05bii net government support £3.0bn (~£3.5bn UK) +# https://www.gov.uk/government/statistics/annual-bus-statistics-year-ending-march-2025/annual-bus-statistics-year-ending-march-2025 +BUS_TARGETS = { + "bus_fare_spending": 3.4e9 * 68.3 / 57.7, + "bus_subsidy_spending": 3.0e9 * 68.3 / 57.7, +} + + +@pytest.mark.parametrize("variable,target", sorted(BUS_TARGETS.items())) +def test_bus_total_matches_dft_target(baseline, variable: str, target: float): + total = baseline.calculate(variable, map_to="household", period=2025).sum() + assert abs(total / target - 1) < 0.2, ( + f"{variable}: £{total / 1e9:.2f}bn vs DfT target £{target / 1e9:.2f}bn " + f"(relative error {abs(total / target - 1):.1%})." + )