PolicyEngine · vahid-ahmadi · Jun 18, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 18, 2026
diff --git a/changelog.d/433.md b/changelog.d/433.md
@@ -0,0 +1,2 @@
+- Populate `employment_sector` (public/private, from FRS `mjobsect`) and `sic_industry_division` (SIC 2007, from FRS `sic`) Person-level variables in the FRS dataset.
+- Add a national calibration target for public-sector employment (`employment_sector == PUBLIC`) against the ONS Public Sector Employment headcount.
diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py
@@ -749,6 +749,22 @@ def determine_education_level(fted_val, typeed2_val, age_val):
         person.empstati, 1, range(12), EMPLOYMENTS
     ).fillna("LONG_TERM_DISABLED")
 
+    # Add employer sector of the main job from FRS `mjobsect`
+    # (1 = private, 2 = public; missing/blank = not in paid work).
+    EMPLOYMENT_SECTORS = ["NOT_EMPLOYED", "PRIVATE", "PUBLIC"]
+    pe_person["employment_sector"] = categorical(
+        pd.to_numeric(person.mjobsect, errors="coerce"),
+        0,
+        [0, 1, 2],
+        EMPLOYMENT_SECTORS,
+    ).fillna("NOT_EMPLOYED")
+
+    # Standard Industrial Classification (2007) division of the main job from
+    # FRS `sic` (0 if unknown; 84 = public administration and defence).
+    pe_person["sic_industry_division"] = (
+        pd.to_numeric(person.sic, errors="coerce").fillna(0).clip(lower=0).astype(int)
+    )
+
     REGIONS = [
         "NORTH_EAST",
         "NORTH_WEST",

diff --git a/policyengine_uk_data/targets/build_loss_matrix.py b/policyengine_uk_data/targets/build_loss_matrix.py
@@ -35,6 +35,7 @@
     compute_person_support,
     compute_obr_council_tax,
     compute_pip_claimants,
+    compute_public_sector_employment,
     compute_regional_age,
     compute_savings_interest,
     compute_scotland_demographics,
@@ -276,6 +277,10 @@ def _compute_column(target: Target, ctx: _SimContext, year: int) -> np.ndarray |
     if target.variable == "tenure_type" and target.is_count:
         return compute_tenure(target, ctx)
 
+    # Public sector employment (ONS PSE)
+    if target.variable == "employment_sector" and target.is_count:
+        return compute_public_sector_employment(target, ctx)
+
     # Income bands (HMRC SPI)
     if target.breakdown_variable == "total_income":
         return compute_income_band(target, ctx)

diff --git a/policyengine_uk_data/targets/compute/__init__.py b/policyengine_uk_data/targets/compute/__init__.py
@@ -37,6 +37,7 @@
     compute_housing,
     compute_land_value,
     compute_person_support,
+    compute_public_sector_employment,
     compute_regional_land_value,
     compute_savings_interest,
     compute_scottish_child_payment,
@@ -59,6 +60,7 @@
     "compute_obr_council_tax",
     "compute_person_support",
     "compute_pip_claimants",
+    "compute_public_sector_employment",
     "compute_regional_age",
     "compute_savings_interest",
     "compute_scotland_demographics",

diff --git a/policyengine_uk_data/targets/compute/other.py b/policyengine_uk_data/targets/compute/other.py
@@ -28,6 +28,13 @@ def compute_vehicles(target, ctx) -> np.ndarray:
     return (ctx.pe("num_vehicles") >= 2).astype(float)
 
 
+def compute_public_sector_employment(target, ctx) -> np.ndarray:
+    """Count people whose main job is in the public sector, per household."""
+    sector = ctx.pe_person("employment_sector")
+    is_public = (sector == "PUBLIC").astype(float)
+    return ctx.household_from_person(is_public)
+
+
 def compute_housing(target, ctx) -> np.ndarray:
     """Compute housing targets (mortgage, private rent, social rent)."""
     name = target.name

diff --git a/policyengine_uk_data/targets/sources/ons_public_sector_employment.py b/policyengine_uk_data/targets/sources/ons_public_sector_employment.py
@@ -0,0 +1,51 @@
+"""ONS Public Sector Employment (PSE) target.
+
+The FRS self-reported employer sector (`mjobsect` -> `employment_sector`)
+over-counts public-sector employment relative to the official ONS PSE
+headcount, so this adds a national calibration target for the number of
+people whose main job is in the public sector
+(`employment_sector == PUBLIC`).
+
+PSE measures the institutional public sector (central government, local
+government and public corporations) - i.e. NHS, state schools, councils,
+civil service and the armed forces - so it is the right official total for
+the whole-public-sector `employment_sector` flag, not the much narrower
+SIC division 84 ("public administration and defence").
+
+Source: ONS Public Sector Employment, UK (headcount, not seasonally
+adjusted). Headline UK totals: ~5.90m (2023), ~5.94m (2024).
+"""
+
+from policyengine_uk_data.targets.schema import (
+    GeographicLevel,
+    Target,
+    Unit,
+)
+
+_REF = (
+    "https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/"
+    "publicsectorpersonnel/bulletins/publicsectoremployment/latest"
+)
+
+# ONS PSE UK total headcount (people), by calendar year.
+_VALUES = {
+    2023: 5_900_000.0,
+    2024: 5_940_000.0,
+}
+
+
+def get_targets() -> list[Target]:
+    return [
+        Target(
+            name="ons/public_sector_employment",
+            variable="employment_sector",
+            source="ons",
+            unit=Unit.COUNT,
+            geographic_level=GeographicLevel.NATIONAL,
+            geo_code="K02000001",
+            geo_name="United Kingdom",
+            values=dict(_VALUES),
+            is_count=True,
+            reference_url=_REF,
+        )
+    ]
diff --git a/policyengine_uk_data/tests/test_legacy_benefit_proxies.py b/policyengine_uk_data/tests/test_legacy_benefit_proxies.py
@@ -427,6 +427,8 @@ def fake_read_csv(path, *args, **kwargs):
                 "eduma": 0,
                 "edumaamt": 0,
                 "empstati": 8,
+                "mjobsect": 0,
+                "sic": 0,
                 "fsbval": 0,
                 "fsfvval": 0,
                 "fsmval": 0,

diff --git a/policyengine_uk_data/tests/test_public_sector_employment_target.py b/policyengine_uk_data/tests/test_public_sector_employment_target.py
@@ -0,0 +1,111 @@
+"""Tests for the ONS Public Sector Employment calibration target.
+
+The target constrains the simulated count of public-sector workers
+(`employment_sector == PUBLIC`) towards the official ONS Public Sector
+Employment (PSE) headcount. A 20% relative tolerance is accepted: the
+FRS self-reported sector over-counts public employment, so calibration
+only needs to bring the figure within a fifth of the official total.
+"""
+
+import pytest
+
+from policyengine_uk_data.datasets.frs_release import CURRENT_FRS_RELEASE
+from policyengine_uk_data.targets import get_all_targets
+from policyengine_uk_data.targets.build_loss_matrix import _resolve_value
+from policyengine_uk_data.targets.sources.ons_public_sector_employment import (
+    get_targets,
+)
+
+# Accepted error between the target *value* and the official ONS PSE figure
+# (a sanity check on the hardcoded target, not a calibration outcome).
+ACCEPTED_RELATIVE_ERROR = 0.20
+
+# Tolerance for the simulated weighted total after data generation. The FRS
+# self-reported sector over-counts public employment (~7.9m vs ONS ~5.9m) and
+# the national calibration only partially pulls it in, so a loose tolerance is
+# used, in line with the other aggregate-vs-target tests (land value ~0.65-0.70,
+# spending aggregates ~0.70, vehicle ownership ~0.30).
+SIMULATED_RELATIVE_TOLERANCE = 0.50
+
+# Official ONS Public Sector Employment, UK (headcount), by year. Held
+# independently of the source module so a wrong target value is caught.
+ONS_PSE_HEADCOUNT = {
+    2023: 5_900_000.0,
+    2024: 5_940_000.0,
+}
+
+# Years the enhanced FRS fixture can represent (mirrors land value tests).
+MODEL_CHECK_YEARS = sorted(
+    {
+        CURRENT_FRS_RELEASE.base_year,
+        CURRENT_FRS_RELEASE.calibration_year,
+    }
+)
+
+
+# ── Target structure ─────────────────────────────────────────────────
+
+
+def test_get_targets_returns_one():
+    """get_targets() should return the single public sector target."""
+    assert len(get_targets()) == 1
+
+
+def test_target_variable_and_metadata():
+    """Target should count employment_sector from ONS."""
+    target = get_targets()[0]
+    assert target.name == "ons/public_sector_employment"
+    assert target.variable == "employment_sector"
+    assert target.source == "ons"
+    assert target.is_count
+
+
+def test_targets_in_registry():
+    """The target should appear in the global registry."""
+    names = {t.name for t in get_all_targets()}
+    assert "ons/public_sector_employment" in names
+
+
+# ── Target values ────────────────────────────────────────────────────
+
+
+def test_target_values_within_20pct_of_ons():
+    """Each target value is within the accepted 20% of the ONS PSE figure."""
+    values = get_targets()[0].values
+    for year, official in ONS_PSE_HEADCOUNT.items():
+        assert year in values, f"missing target for {year}"
+        rel_error = abs(values[year] / official - 1)
+        assert rel_error <= ACCEPTED_RELATIVE_ERROR, (
+            f"{year} target {values[year]:,.0f} differs from ONS PSE "
+            f"{official:,.0f} by {rel_error:.1%} (>20%)."
+        )
+
+
+# ── Simulated total after data generation ────────────────────────────
+
+
+@pytest.mark.parametrize("year", MODEL_CHECK_YEARS, ids=map(str, MODEL_CHECK_YEARS))
+def test_public_sector_employment_total(enhanced_frs, baseline, year):
+    """Weighted public-sector total is within tolerance of the ONS PSE target.
+
+    Runs against the generated enhanced FRS, whose national calibration
+    includes the public sector employment target. Skipped if the dataset
+    predates the variable (rebuild with ``make data``).
+    """
+    if "employment_sector" not in enhanced_frs.person.columns:
+        pytest.skip("dataset predates employment_sector; rebuild with `make data`")
+
+    target = _resolve_value(get_targets()[0], year)
+    assert target is not None, f"no target value resolvable for {year}"
+
+    weights = baseline.calculate("household_weight", period=year).values
+    sector = baseline.calculate("employment_sector", period=year).values
+    is_public = (sector == "PUBLIC").astype(float)
+    estimate = (baseline.map_result(is_public, "person", "household") * weights).sum()
+
+    rel_error = abs(estimate / target - 1)
+    assert rel_error < SIMULATED_RELATIVE_TOLERANCE, (
+        f"public sector employment ({year}): expected {target:,.0f}, "
+        f"got {estimate:,.0f} (relative error = {rel_error:.1%}, "
+        f"tolerance = {SIMULATED_RELATIVE_TOLERANCE:.0%})"
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
     "policyengine",
     "google-cloud-storage",
     "google-auth",
-    "policyengine-uk>=2.89.1",
+    "policyengine-uk>=2.89.2",
     "microcalibrate>=0.18.0",
     "microimpute>=1.0.1",
     "ruff>=0.9.0",

diff --git a/uv.lock b/uv.lock
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		- Populate `employment_sector` (public/private, from FRS `mjobsect`) and `sic_industry_division` (SIC 2007, from FRS `sic`) Person-level variables in the FRS dataset.
		- Add a national calibration target for public-sector employment (`employment_sector == PUBLIC`) against the ONS Public Sector Employment headcount.