Skip to content
This repository was archived by the owner on Jun 19, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/431.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- Calibrate `bus_fare_spending` and `bus_subsidy_spending` to DfT Annual Bus Statistics (year ending March 2025, England) totals — passenger fare receipts £3.4bn (table BUS05aii) and net government support £3.0bn (table BUS05bii), uplifted England→UK by population — via post-calibration scaling steps mirroring the rail subsidy calibration. Without anchoring, imputed bus fare inherited the broader transport-consumption over-estimate (~£10bn, ~3× too high) and bus subsidy drifted low (~£1.5bn). Adds tests asserting both bus totals match the DfT targets within 20% in the built dataset (skipped in PR CI where no dataset is built, active on the post-merge build, like test_energy_calibration).
10 changes: 10 additions & 0 deletions policyengine_uk_data/datasets/create_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,20 +292,30 @@ def main():
update_dataset("Calibrate public service aggregates", "processing")
from policyengine_uk_data.datasets.imputations.services.services import (
calibrate_rail_subsidy_spending,
calibrate_bus_subsidy_spending,
)

calibrate_rail_subsidy_spending(
frs_calibrated,
frs_release.calibration_year,
)
calibrate_bus_subsidy_spending(
frs_calibrated,
frs_release.calibration_year,
)
update_dataset("Calibrate public service aggregates", "completed")

update_dataset("Calibrate fuel litres", "processing")
from policyengine_uk_data.datasets.imputations.consumption import (
calibrate_dataset_fuel_litre_proxies_to_road_fuel,
calibrate_bus_fare_spending,
)

calibrate_dataset_fuel_litre_proxies_to_road_fuel(frs_calibrated)
calibrate_bus_fare_spending(
frs_calibrated,
frs_release.calibration_year,
)
update_dataset("Calibrate fuel litres", "completed")

update_dataset("Save final dataset", "processing")
Expand Down
45 changes: 45 additions & 0 deletions policyengine_uk_data/datasets/imputations/consumption.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,51 @@ def calibrate_dataset_fuel_litre_proxies_to_road_fuel(
)


# England → UK uplift for England-only DfT bus figures: ONS mid-2023 population
# ratio (UK 68.3M / England 57.7M ≈ 1.18), a best approximation since DfT
# publishes no single GB/UK bus-finance total. Indicative — bus use per head
# varies by nation. https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates
ENGLAND_TO_UK_POPULATION_UPLIFT = 68.3 / 57.7 # ≈ 1.18

BUS_FARE_TARGETS = {
# DfT Annual Bus Statistics, year ending March 2025 (England), table
# BUS05aii: passenger fare receipts on local bus services were GBP 3.4bn
# (52% of GBP 6.6bn total operating revenue), uplifted England → UK by
# population (≈ GBP 4.0bn UK). Without anchoring, the imputed aggregate
# inherits the broader transport-consumption over-estimate (~GBP 10bn).
# https://www.gov.uk/government/statistics/annual-bus-statistics-year-ending-march-2025/annual-bus-statistics-year-ending-march-2025
2025: 3.4e9 * ENGLAND_TO_UK_POPULATION_UPLIFT,
}


def calibrate_bus_fare_spending(
dataset: UKSingleYearDataset,
time_period: int,
) -> float | None:
"""Scale bus_fare_spending to the DfT passenger-fare total (BUS_FARE_TARGETS)."""
target = BUS_FARE_TARGETS.get(time_period)
if target is None:
return None

original_time_period = dataset.time_period
dataset.time_period = str(original_time_period)
try:
simulation = Microsimulation(dataset=dataset)
actual = simulation.calculate(
"bus_fare_spending",
period=time_period,
map_to="household",
).sum()
finally:
dataset.time_period = original_time_period
if actual <= 0:
raise ValueError(f"Cannot calibrate bus_fare_spending: aggregate is {actual}.")

scale = target / actual
dataset.household["bus_fare_spending"] *= scale
return scale


def save_imputation_models():
from policyengine_uk_data.utils.qrf import QRF

Expand Down
48 changes: 48 additions & 0 deletions policyengine_uk_data/datasets/imputations/services/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,24 @@
2025: 21.6e9,
}

# England → UK uplift for England-only DfT bus figures. DfT publishes no single
# GB/UK bus-finance total, so we scale by the ONS mid-2023 population ratio
# (UK 68.3M / England 57.7M ≈ 1.18) as a best approximation. This is indicative:
# bus use per head varies by nation (London lifts England's per-capita use), so
# the true UK factor is likely a little below the population ratio.
# ONS mid-year population estimates:
# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates
ENGLAND_TO_UK_POPULATION_UPLIFT = 68.3 / 57.7 # ≈ 1.18

BUS_SUBSIDY_TARGETS = {
# DfT Annual Bus Statistics, year ending March 2025 (England), table
# BUS05bii: total net government support for local bus services was
# GBP 3.0bn (of which GBP 0.8bn concessionary travel reimbursement),
# uplifted England → UK by population (≈ GBP 3.5bn UK).
# https://www.gov.uk/government/statistics/annual-bus-statistics-year-ending-march-2025/annual-bus-statistics-year-ending-march-2025
2025: 3.0e9 * ENGLAND_TO_UK_POPULATION_UPLIFT,
}


def get_fare_index_survey_year() -> float:
"""
Expand Down Expand Up @@ -66,6 +84,36 @@ def calibrate_rail_subsidy_spending(
return scale


def calibrate_bus_subsidy_spending(
dataset: UKSingleYearDataset,
time_period: int,
) -> float | None:
"""Scale bus_subsidy_spending to the DfT net-support total (BUS_SUBSIDY_TARGETS)."""
target = BUS_SUBSIDY_TARGETS.get(time_period)
if target is None:
return None

original_time_period = dataset.time_period
dataset.time_period = str(original_time_period)
try:
simulation = Microsimulation(dataset=dataset)
actual = simulation.calculate(
"bus_subsidy_spending",
period=time_period,
map_to="household",
).sum()
finally:
dataset.time_period = original_time_period
if actual <= 0:
raise ValueError(
f"Cannot calibrate bus_subsidy_spending: aggregate is {actual}."
)

scale = target / actual
dataset.household["bus_subsidy_spending"] *= scale
return scale


def impute_services(
dataset: UKSingleYearDataset,
) -> UKSingleYearDataset:
Expand Down
18 changes: 9 additions & 9 deletions policyengine_uk_data/tests/test_aggregates.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
# ORR/GOV.UK rail finance statistics report GBP 21.6bn of government
# support to the rail industry in 2024-25.
"rail_subsidy_spending": 21.6e9,
# Approximate public support for local bus services; kept as a loose
# smoke-test target because source coverage and dataset coverage differ.
"bus_subsidy_spending": 2.5e9,
# DfT Annual Bus Statistics (year ending March 2025) report GBP 3.4bn
# passenger fare receipts for local bus services in England. The LCFS input
# is UK household bus/coach fare spending, so this is an order-of-magnitude
# target. Enable once a dataset built with the bus_fare_spending imputation
# is published — the column is absent from the currently-released dataset.
# "bus_fare_spending": 3.4e9,
# DfT Annual Bus Statistics (year ending March 2025, England), table
# BUS05bii: net government support ~GBP 3.0bn, uplifted England→UK by
# population (~GBP 3.5bn). bus_subsidy_spending is calibrated to this.
"bus_subsidy_spending": 3.0e9 * 68.3 / 57.7,
# DfT Annual Bus Statistics (year ending March 2025, England), table
# BUS05aii: passenger fare receipts ~GBP 3.4bn, uplifted England→UK by
# population (~GBP 4.0bn). bus_fare_spending is calibrated to this. Enable
# once a dataset built with that calibration is published (released predates).
# "bus_fare_spending": 3.4e9 * 68.3 / 57.7,
}


Expand Down
30 changes: 30 additions & 0 deletions policyengine_uk_data/tests/test_bus_fare_spending_in_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Bus fare / subsidy totals in the built dataset must match the DfT targets.

These use the enhanced FRS dataset, which is produced by ``make data`` (the
build / push CI / local generation) and is *not* fetched by ``make download``.
So the `baseline` fixture skips them in PR CI (no built dataset) and runs them
after a build, against the freshly calibrated data — the same pattern as
test_energy_calibration. Both bus variables are calibrated to the official DfT
totals in the build, so the totals should match closely; a 20% band is allowed.
"""

import pytest

# DfT Annual Bus Statistics, year ending March 2025 (England), uplifted
# England -> UK by ONS mid-2023 population (x 68.3 / 57.7):
# bus_fare_spending -> BUS05aii passenger fare receipts £3.4bn (~£4.0bn UK)
# bus_subsidy_spending -> BUS05bii net government support £3.0bn (~£3.5bn UK)
# https://www.gov.uk/government/statistics/annual-bus-statistics-year-ending-march-2025/annual-bus-statistics-year-ending-march-2025
BUS_TARGETS = {
"bus_fare_spending": 3.4e9 * 68.3 / 57.7,
"bus_subsidy_spending": 3.0e9 * 68.3 / 57.7,
}


@pytest.mark.parametrize("variable,target", sorted(BUS_TARGETS.items()))
def test_bus_total_matches_dft_target(baseline, variable: str, target: float):
total = baseline.calculate(variable, map_to="household", period=2025).sum()
assert abs(total / target - 1) < 0.2, (
f"{variable}: £{total / 1e9:.2f}bn vs DfT target £{target / 1e9:.2f}bn "
f"(relative error {abs(total / target - 1):.1%})."
)