Skip to content
This repository was archived by the owner on Jun 19, 2026. It is now read-only.
2 changes: 2 additions & 0 deletions changelog.d/433.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- Populate `employment_sector` (public/private, from FRS `mjobsect`) and `sic_industry_division` (SIC 2007, from FRS `sic`) Person-level variables in the FRS dataset.
- Add a national calibration target for public-sector employment (`employment_sector == PUBLIC`) against the ONS Public Sector Employment headcount.
16 changes: 16 additions & 0 deletions policyengine_uk_data/datasets/frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,22 @@ def determine_education_level(fted_val, typeed2_val, age_val):
person.empstati, 1, range(12), EMPLOYMENTS
).fillna("LONG_TERM_DISABLED")

# Add employer sector of the main job from FRS `mjobsect`
# (1 = private, 2 = public; missing/blank = not in paid work).
EMPLOYMENT_SECTORS = ["NOT_EMPLOYED", "PRIVATE", "PUBLIC"]
pe_person["employment_sector"] = categorical(
pd.to_numeric(person.mjobsect, errors="coerce"),
0,
[0, 1, 2],
EMPLOYMENT_SECTORS,
).fillna("NOT_EMPLOYED")

# Standard Industrial Classification (2007) division of the main job from
# FRS `sic` (0 if unknown; 84 = public administration and defence).
pe_person["sic_industry_division"] = (
pd.to_numeric(person.sic, errors="coerce").fillna(0).clip(lower=0).astype(int)
)

REGIONS = [
"NORTH_EAST",
"NORTH_WEST",
Expand Down
5 changes: 5 additions & 0 deletions policyengine_uk_data/targets/build_loss_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
compute_person_support,
compute_obr_council_tax,
compute_pip_claimants,
compute_public_sector_employment,
compute_regional_age,
compute_savings_interest,
compute_scotland_demographics,
Expand Down Expand Up @@ -276,6 +277,10 @@ def _compute_column(target: Target, ctx: _SimContext, year: int) -> np.ndarray |
if target.variable == "tenure_type" and target.is_count:
return compute_tenure(target, ctx)

# Public sector employment (ONS PSE)
if target.variable == "employment_sector" and target.is_count:
return compute_public_sector_employment(target, ctx)

# Income bands (HMRC SPI)
if target.breakdown_variable == "total_income":
return compute_income_band(target, ctx)
Expand Down
2 changes: 2 additions & 0 deletions policyengine_uk_data/targets/compute/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
compute_housing,
compute_land_value,
compute_person_support,
compute_public_sector_employment,
compute_regional_land_value,
compute_savings_interest,
compute_scottish_child_payment,
Expand All @@ -59,6 +60,7 @@
"compute_obr_council_tax",
"compute_person_support",
"compute_pip_claimants",
"compute_public_sector_employment",
"compute_regional_age",
"compute_savings_interest",
"compute_scotland_demographics",
Expand Down
7 changes: 7 additions & 0 deletions policyengine_uk_data/targets/compute/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ def compute_vehicles(target, ctx) -> np.ndarray:
return (ctx.pe("num_vehicles") >= 2).astype(float)


def compute_public_sector_employment(target, ctx) -> np.ndarray:
"""Count people whose main job is in the public sector, per household."""
sector = ctx.pe_person("employment_sector")
is_public = (sector == "PUBLIC").astype(float)
return ctx.household_from_person(is_public)


def compute_housing(target, ctx) -> np.ndarray:
"""Compute housing targets (mortgage, private rent, social rent)."""
name = target.name
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""ONS Public Sector Employment (PSE) target.

The FRS self-reported employer sector (`mjobsect` -> `employment_sector`)
over-counts public-sector employment relative to the official ONS PSE
headcount, so this adds a national calibration target for the number of
people whose main job is in the public sector
(`employment_sector == PUBLIC`).

PSE measures the institutional public sector (central government, local
government and public corporations) - i.e. NHS, state schools, councils,
civil service and the armed forces - so it is the right official total for
the whole-public-sector `employment_sector` flag, not the much narrower
SIC division 84 ("public administration and defence").

Source: ONS Public Sector Employment, UK (headcount, not seasonally
adjusted). Headline UK totals: ~5.90m (2023), ~5.94m (2024).
"""

from policyengine_uk_data.targets.schema import (
GeographicLevel,
Target,
Unit,
)

_REF = (
"https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/"
"publicsectorpersonnel/bulletins/publicsectoremployment/latest"
)

# ONS PSE UK total headcount (people), by calendar year.
_VALUES = {
2023: 5_900_000.0,
2024: 5_940_000.0,
}


def get_targets() -> list[Target]:
return [
Target(
name="ons/public_sector_employment",
variable="employment_sector",
source="ons",
unit=Unit.COUNT,
geographic_level=GeographicLevel.NATIONAL,
geo_code="K02000001",
geo_name="United Kingdom",
values=dict(_VALUES),
is_count=True,
reference_url=_REF,
)
]
2 changes: 2 additions & 0 deletions policyengine_uk_data/tests/test_legacy_benefit_proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,8 @@ def fake_read_csv(path, *args, **kwargs):
"eduma": 0,
"edumaamt": 0,
"empstati": 8,
"mjobsect": 0,
"sic": 0,
"fsbval": 0,
"fsfvval": 0,
"fsmval": 0,
Expand Down
111 changes: 111 additions & 0 deletions policyengine_uk_data/tests/test_public_sector_employment_target.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""Tests for the ONS Public Sector Employment calibration target.

The target constrains the simulated count of public-sector workers
(`employment_sector == PUBLIC`) towards the official ONS Public Sector
Employment (PSE) headcount. A 20% relative tolerance is accepted: the
FRS self-reported sector over-counts public employment, so calibration
only needs to bring the figure within a fifth of the official total.
"""

import pytest

from policyengine_uk_data.datasets.frs_release import CURRENT_FRS_RELEASE
from policyengine_uk_data.targets import get_all_targets
from policyengine_uk_data.targets.build_loss_matrix import _resolve_value
from policyengine_uk_data.targets.sources.ons_public_sector_employment import (
get_targets,
)

# Accepted error between the target *value* and the official ONS PSE figure
# (a sanity check on the hardcoded target, not a calibration outcome).
ACCEPTED_RELATIVE_ERROR = 0.20

# Tolerance for the simulated weighted total after data generation. The FRS
# self-reported sector over-counts public employment (~7.9m vs ONS ~5.9m) and
# the national calibration only partially pulls it in, so a loose tolerance is
# used, in line with the other aggregate-vs-target tests (land value ~0.65-0.70,
# spending aggregates ~0.70, vehicle ownership ~0.30).
SIMULATED_RELATIVE_TOLERANCE = 0.50

# Official ONS Public Sector Employment, UK (headcount), by year. Held
# independently of the source module so a wrong target value is caught.
ONS_PSE_HEADCOUNT = {
2023: 5_900_000.0,
2024: 5_940_000.0,
}

# Years the enhanced FRS fixture can represent (mirrors land value tests).
MODEL_CHECK_YEARS = sorted(
{
CURRENT_FRS_RELEASE.base_year,
CURRENT_FRS_RELEASE.calibration_year,
}
)


# ── Target structure ─────────────────────────────────────────────────


def test_get_targets_returns_one():
"""get_targets() should return the single public sector target."""
assert len(get_targets()) == 1


def test_target_variable_and_metadata():
"""Target should count employment_sector from ONS."""
target = get_targets()[0]
assert target.name == "ons/public_sector_employment"
assert target.variable == "employment_sector"
assert target.source == "ons"
assert target.is_count


def test_targets_in_registry():
"""The target should appear in the global registry."""
names = {t.name for t in get_all_targets()}
assert "ons/public_sector_employment" in names


# ── Target values ────────────────────────────────────────────────────


def test_target_values_within_20pct_of_ons():
"""Each target value is within the accepted 20% of the ONS PSE figure."""
values = get_targets()[0].values
for year, official in ONS_PSE_HEADCOUNT.items():
assert year in values, f"missing target for {year}"
rel_error = abs(values[year] / official - 1)
assert rel_error <= ACCEPTED_RELATIVE_ERROR, (
f"{year} target {values[year]:,.0f} differs from ONS PSE "
f"{official:,.0f} by {rel_error:.1%} (>20%)."
)


# ── Simulated total after data generation ────────────────────────────


@pytest.mark.parametrize("year", MODEL_CHECK_YEARS, ids=map(str, MODEL_CHECK_YEARS))
def test_public_sector_employment_total(enhanced_frs, baseline, year):
"""Weighted public-sector total is within tolerance of the ONS PSE target.

Runs against the generated enhanced FRS, whose national calibration
includes the public sector employment target. Skipped if the dataset
predates the variable (rebuild with ``make data``).
"""
if "employment_sector" not in enhanced_frs.person.columns:
pytest.skip("dataset predates employment_sector; rebuild with `make data`")

target = _resolve_value(get_targets()[0], year)
assert target is not None, f"no target value resolvable for {year}"

weights = baseline.calculate("household_weight", period=year).values
sector = baseline.calculate("employment_sector", period=year).values
is_public = (sector == "PUBLIC").astype(float)
estimate = (baseline.map_result(is_public, "person", "household") * weights).sum()

rel_error = abs(estimate / target - 1)
assert rel_error < SIMULATED_RELATIVE_TOLERANCE, (
f"public sector employment ({year}): expected {target:,.0f}, "
f"got {estimate:,.0f} (relative error = {rel_error:.1%}, "
f"tolerance = {SIMULATED_RELATIVE_TOLERANCE:.0%})"
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
"policyengine",
"google-cloud-storage",
"google-auth",
"policyengine-uk>=2.89.1",
"policyengine-uk>=2.89.2",
"microcalibrate>=0.18.0",
"microimpute>=1.0.1",
"ruff>=0.9.0",
Expand Down
10 changes: 5 additions & 5 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.