From 7d1f3b4c49df6d7d940e31645fcf8a8827858fdf Mon Sep 17 00:00:00 2001
From: Joseph <162703152+josephnef@users.noreply.github.com>
Date: Sun, 7 Jun 2026 16:31:01 +0300
Subject: [PATCH 1/2] Surface phy-level soft metrics on stream lines +
 BER-vs-SNR analyser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up A from #83. Adds per-path RSSI / EVM / SNR to every
<devourer-stream> line so corruption_analysis.py can correlate BER
with link quality on a per-frame basis instead of relying on
aggregated statistics.

* demo/main.cpp: <devourer-stream>rate=R len=L crc_err=X icv_err=Y
  rssi=A,B evm=A,B snr=A,B body=HEX. Same source as the Tier-2
  diagnostics in <devourer-body>; no new RX-status fields, just
  surfacing what FrameParser already populates.
* tools/precoder/corruption_analysis.py: parses the new fields,
  reports
    - SNR distribution (min/p25/med/p75/max) for chip-clean vs
      chip-corrupt populations
    - BER per 5-dB SNR bucket
  Uses max(snr_A, snr_B) as the "effective" SNR — on single-antenna
  1T1R sticks path B reads 0 (no signal, not "0 dB"), so a naive min
  would always report 0 and the bucket view collapses; max picks
  the active path on 1T1R and the stronger path on 2T2R
  single-stream operation.
* stream_rx.py / tun_p2p.py / precoder_stream_roundtrip.py: regex
  updated to tolerate the new optional rssi/evm/snr fields (none
  read them yet — pass-through compatibility).

Verification

Hardware (500 frames at default TX power, RTL8812AU → T2U Plus
RTL8821AU, ch 6):

    phy SNR (stronger path, dB):
      chip-clean    : n=467 min=0 p25=30 med=33 p75=38 max=51
      chip-corrupt  : n=0
    BER by SNR bucket (stronger path, 5-dB buckets):
      bucket       frames   bits-cmp   bit-err    BER
           0-5 dB        1        192        0   0.000e+00
         20-25 dB       11       2112        0   0.000e+00
         25-30 dB       76      14592        0   0.000e+00
         30-35 dB      178      34176        0   0.000e+00
         35-40 dB      122      23424        0   0.000e+00
         40-45 dB       55      10560        0   0.000e+00
         45-50 dB       19       3648        0   0.000e+00
         50-55 dB        5        960        0   0.000e+00

Bench link is too clean for chip-corrupt events even at the SNR tails,
which matches the post-PR-investigation finding for #83: at bench
distance the loss is at PHY sync, not FCS. The analyser is ready for
noisier deployments / range-extended captures (follow-up B).

Offline smoke (synthetic 5-clean@28dB + 5-corrupt@5dB injection)
correctly buckets BER=0 in the 25-30 dB bucket and BER=1.04e-2 in the
5-10 dB bucket — the per-bucket correlation works as designed.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 demo/main.cpp                         | 15 +++++-
 tests/precoder_stream_roundtrip.py    |  3 ++
 tools/precoder/corruption_analysis.py | 75 +++++++++++++++++++++++++++
 tools/precoder/stream_rx.py           |  3 ++
 tools/precoder/tun_p2p.py             |  3 ++
 5 files changed, 97 insertions(+), 2 deletions(-)
diff --git a/demo/main.cpp b/demo/main.cpp
index 8e5aaa0..be9a6e8 100644
--- a/demo/main.cpp
+++ b/demo/main.cpp
@@ -80,10 +80,21 @@ static void packetProcessor(const Packet &packet) {
           std::getenv("DEVOURER_RX_KEEP_CORRUPTED") != nullptr;
       const bool corrupted = packet.RxAtrib.crc_err || packet.RxAtrib.icv_err;
       if (stream_out && (!corrupted || keep_corrupted)) {
-        printf("<devourer-stream>rate=%u len=%zu crc_err=%u icv_err=%u body=",
+        /* Per-stream phy soft metrics (RSSI / EVM / SNR for paths A,B; on
+         * 8814AU paths C,D would also be non-zero but we surface only A,B
+         * here to stay aligned with <devourer-body>'s format). These are
+         * link-quality measurements at the PHY before decoding — same
+         * source as the Tier-2 diagnostics — so a consumer like
+         * corruption_analysis.py can correlate BER with link quality on a
+         * per-frame basis instead of relying on aggregated statistics. */
+        printf("<devourer-stream>rate=%u len=%zu crc_err=%u icv_err=%u "
+               "rssi=%d,%d evm=%d,%d snr=%d,%d body=",
                packet.RxAtrib.data_rate, packet.Data.size(),
                packet.RxAtrib.crc_err ? 1u : 0u,
-               packet.RxAtrib.icv_err ? 1u : 0u);
+               packet.RxAtrib.icv_err ? 1u : 0u,
+               packet.RxAtrib.rssi[0], packet.RxAtrib.rssi[1],
+               packet.RxAtrib.evm[0], packet.RxAtrib.evm[1],
+               packet.RxAtrib.snr[0], packet.RxAtrib.snr[1]);
         for (size_t i = 24; i < packet.Data.size(); ++i)
           printf("%02x", packet.Data[i]);
         printf("\n");
diff --git a/tests/precoder_stream_roundtrip.py b/tests/precoder_stream_roundtrip.py
index 278bc4f..4f45281 100644
--- a/tests/precoder_stream_roundtrip.py
+++ b/tests/precoder_stream_roundtrip.py
@@ -54,6 +54,9 @@
     r"<devourer-stream>rate=(?P<rate>\d+)\s+len=(?P<len>\d+)"
     r"(?:\s+crc_err=(?P<crc_err>\d+))?"
     r"(?:\s+icv_err=(?P<icv_err>\d+))?"
+    r"(?:\s+rssi=(?P<rssi>-?\d+,-?\d+))?"
+    r"(?:\s+evm=(?P<evm>-?\d+,-?\d+))?"
+    r"(?:\s+snr=(?P<snr>-?\d+,-?\d+))?"
     r"\s+body=(?P<hex>[0-9a-fA-F]*)"
 )
 
diff --git a/tools/precoder/corruption_analysis.py b/tools/precoder/corruption_analysis.py
index 2862de2..a8acbfc 100644
--- a/tools/precoder/corruption_analysis.py
+++ b/tools/precoder/corruption_analysis.py
@@ -62,10 +62,45 @@
     r"<devourer-stream>rate=(?P<rate>\d+)\s+len=(?P<len>\d+)"
     r"(?:\s+crc_err=(?P<crc_err>\d+))?"
     r"(?:\s+icv_err=(?P<icv_err>\d+))?"
+    r"(?:\s+rssi=(?P<rssi>-?\d+,-?\d+))?"
+    r"(?:\s+evm=(?P<evm>-?\d+,-?\d+))?"
+    r"(?:\s+snr=(?P<snr>-?\d+,-?\d+))?"
     r"\s+body=(?P<hex>[0-9a-fA-F]*)"
 )
 
 
+def _parse_pair(s: Optional[str]) -> Optional[tuple[int, int]]:
+    if not s:
+        return None
+    a, b = s.split(",")
+    return int(a), int(b)
+
+
+def _effective_snr(snr: Optional[tuple[int, int]]) -> Optional[int]:
+    """Pick the SNR value that actually drove decode quality.
+
+    The two-path field carries SNR for paths A and B; on single-antenna
+    USB sticks path B reads 0 (no signal, not "0 dB SNR"), so a naive
+    min(A,B) would always report 0 and the BER-vs-SNR view collapses.
+    `max(A,B)` works for both 1T1R (B is 0, A drives) and 2T2R single-
+    stream operation (the chip picks the stronger path for the only
+    stream). For an honest 2T2R two-stream analysis a finer model
+    would be needed; this is a single-stream PoC.
+    """
+    if snr is None:
+        return None
+    return max(snr)
+
+
+def _snr_bucket(snr: Optional[tuple[int, int]]) -> str:
+    """Group SNR into 5-dB buckets. Returns 'no-snr' when absent."""
+    eff = _effective_snr(snr)
+    if eff is None:
+        return "no-snr"
+    base = (eff // 5) * 5
+    return f"{base:>3d}-{base + 5} dB"
+
+
 def _expected_bodies(source: bytes, mtu: int, body_bytes: int,
                      seq_start: int = 0) -> dict[int, bytes]:
     """Reproduce the TX side's encoded envelopes for `source`. Byte mode
@@ -116,6 +151,13 @@ def main(argv: Optional[list[str]] = None) -> int:
     byte_pos_examined = collections.Counter()
     per_frame_byte_errs: list[int] = []
     per_frame_bit_errs: list[int] = []
+    # Per-frame phy metrics (parsed but only used when present).
+    snr_clean: list[int] = []
+    snr_corrupt: list[int] = []
+    # (snr_bucket, corrupted_or_not) -> count; for the BER-vs-SNR table.
+    bucket_frames: collections.Counter = collections.Counter()
+    bucket_bit_errors: collections.Counter = collections.Counter()
+    bucket_bits_compared: collections.Counter = collections.Counter()
 
     for line in sys.stdin:
         m = _STREAM_RE.search(line)
@@ -124,6 +166,10 @@ def main(argv: Optional[list[str]] = None) -> int:
         total_captured += 1
         crc_err = int(m.group("crc_err") or 0)
         icv_err = int(m.group("icv_err") or 0)
+        snr = _parse_pair(m.group("snr"))
+        eff = _effective_snr(snr)
+        if eff is not None:
+            (snr_corrupt if crc_err or icv_err else snr_clean).append(eff)
         if crc_err or icv_err:
             total_corrupted += 1
         else:
@@ -156,6 +202,10 @@ def main(argv: Optional[list[str]] = None) -> int:
         bit_errors += frame_bit_errs
         per_frame_byte_errs.append(frame_byte_errs)
         per_frame_bit_errs.append(frame_bit_errs)
+        bucket = _snr_bucket(snr)
+        bucket_frames[bucket] += 1
+        bucket_bit_errors[bucket] += frame_bit_errs
+        bucket_bits_compared[bucket] += compare_len * 8
 
     if not matched_seq:
         sys.stderr.write(
@@ -197,6 +247,31 @@ def main(argv: Optional[list[str]] = None) -> int:
             pct = 100.0 * count / max(1, exam)
             print(f"  {pos:3d}   {count:5d}/{exam:5d}   {pct:5.1f}%")
 
+    # Phy-metrics correlation. Two views: distribution of weakest-path SNR
+    # for chip-clean vs chip-corrupt frames, and per-SNR-bucket BER.
+    if snr_clean or snr_corrupt:
+        def _stat(xs: list[int]) -> str:
+            if not xs:
+                return "n=0"
+            xs = sorted(xs)
+            n = len(xs)
+            return (f"n={n} min={xs[0]} p25={xs[n // 4]} "
+                    f"med={xs[n // 2]} p75={xs[(3 * n) // 4]} max={xs[-1]}")
+        print(f"\nphy SNR (stronger path, dB):")
+        print(f"  chip-clean    : {_stat(snr_clean)}")
+        print(f"  chip-corrupt  : {_stat(snr_corrupt)}")
+
+    if bucket_frames and any(b != "no-snr" for b in bucket_frames):
+        print(f"\nBER by SNR bucket (stronger path, 5-dB buckets):")
+        print(f"  bucket       frames   bits-cmp   bit-err    BER")
+        for bucket in sorted(bucket_frames):
+            n = bucket_frames[bucket]
+            bits = bucket_bits_compared[bucket]
+            errs = bucket_bit_errors[bucket]
+            ber = errs / max(1, bits)
+            print(f"  {bucket:>11s}   {n:6d}   {bits:8d}   {errs:6d}   "
+                  f"{ber:.3e}")
+
     return 0
 
 
diff --git a/tools/precoder/stream_rx.py b/tools/precoder/stream_rx.py
index f30ba79..f5352c3 100644
--- a/tools/precoder/stream_rx.py
+++ b/tools/precoder/stream_rx.py
@@ -46,6 +46,9 @@
     r"<devourer-stream>rate=(?P<rate>\d+)\s+len=(?P<len>\d+)"
     r"(?:\s+crc_err=(?P<crc_err>\d+))?"
     r"(?:\s+icv_err=(?P<icv_err>\d+))?"
+    r"(?:\s+rssi=(?P<rssi>-?\d+,-?\d+))?"
+    r"(?:\s+evm=(?P<evm>-?\d+,-?\d+))?"
+    r"(?:\s+snr=(?P<snr>-?\d+,-?\d+))?"
     r"\s+body=(?P<hex>[0-9a-fA-F]*)"
 )
 
diff --git a/tools/precoder/tun_p2p.py b/tools/precoder/tun_p2p.py
index 2d895da..ad3e76f 100644
--- a/tools/precoder/tun_p2p.py
+++ b/tools/precoder/tun_p2p.py
@@ -88,6 +88,9 @@
     r"<devourer-stream>rate=(?P<rate>\d+)\s+len=(?P<len>\d+)"
     r"(?:\s+crc_err=(?P<crc_err>\d+))?"
     r"(?:\s+icv_err=(?P<icv_err>\d+))?"
+    r"(?:\s+rssi=(?P<rssi>-?\d+,-?\d+))?"
+    r"(?:\s+evm=(?P<evm>-?\d+,-?\d+))?"
+    r"(?:\s+snr=(?P<snr>-?\d+,-?\d+))?"
     r"\s+body=(?P<hex>[0-9a-fA-F]*)"
 )
 

From 62381960713073a75637cf6791050077eac450d3 Mon Sep 17 00:00:00 2001
From: Joseph <162703152+josephnef@users.noreply.github.com>
Date: Sun, 7 Jun 2026 16:39:27 +0300
Subject: [PATCH 2/2] Corruption-pattern survey for FEC design
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up B from #83 (and depends on #84's phy soft metrics): adds a
chip-side DEVOURER_RX_DUMP_ALL env var that emits a
<devourer-corrupt-any> line for every RX frame, plus an aggregate
analyser that turns those into FEC-design-grade statistics.

* demo/main.cpp: DEVOURER_RX_DUMP_ALL=1 emits one body-less line per
  frame with len + chip-flag bits + rate + per-path rssi/evm/snr.
  Body bytes are deliberately omitted (a hot survey would inflate
  the log past usable size); pkt_len + flags + phy is what the
  aggregate report needs.

* tools/precoder/corruption_survey.py: parses the new lines and
  reports
    - headline chip-clean / chip-corrupt counts
    - corruption rate broken down by DESC_RATE (the CCK vs OFDM
      split — without this the headline number is dominated by
      always-clean CCK ACKs and beacons and underestimates what
      OFDM data faces)
    - frame-size distribution for chip-clean vs chip-corrupt
    - phy-metric stats (rssi/evm/snr) per population, filtered to
      frames where the chip actually populated phy stats (CCK and
      short mgmt frames report 0/0; we treat those as "no
      measurement" instead of "0 dB" so the bucket views don't
      collapse)
    - per-SNR-bucket corruption rate (where measurable)
    - temporal clustering (when running live for >1 s; skipped on
      file/pipe input where all lines arrive at once)
  Output ends with a heuristic FEC recommendation based on
  median-vs-peak corruption rate.

Bench finding (60 s ch6 capture, busy office environment near
several APs):

  === corruption survey (2266 frames, file/pipe) ===
  chip-clean       :   1663 ( 73.4%)
  chip-corrupt     :    603 ( 26.6%)
  corruption rate  : 26.61%
  no-phy-measurement:  2103  (CCK/short frames, chip reports 0/0)

  Corruption rate by DESC_RATE:
     idx name            count      %    corrupt    rate
    0x00 1M CCK           2075  91.6%        412  19.9%
    0x02 5.5M CCK            2   0.1%          2 100.0%
    0x03 11M CCK             1   0.0%          1 100.0%
    0x04 6M OFDM            17   0.8%         17 100.0%
    0x05 9M OFDM            19   0.8%         19 100.0%
    0x06 12M OFDM           20   0.9%         20 100.0%
    0x07 18M OFDM           31   1.4%         31 100.0%
    0x08 24M OFDM           22   1.0%         22 100.0%
    0x09 36M OFDM           30   1.3%         30 100.0%
    0x0a 48M OFDM           31   1.4%         31 100.0%
    0x0b 54M OFDM           18   0.8%         18 100.0%

The FEC-design takeaway: 1M CCK is robust at ~20% loss because the
modulation is simple; every OFDM rate is 100% corrupt because we're
hearing distant APs at marginal SNR. The PoC's 6M OFDM stream link
works only because TX and RX are co-located — at any real range the
chip will surface FCS failures at high rate and the stream layer
needs inter-frame parity (Reed-Solomon / Raptor) to recover, not
just per-frame FEC. The tool gives FEC designers the concrete
inputs (rate distribution, snr distribution, time clustering) to
size the parity block and overhead.

Builds on #83 (chip-level filter open) and #84 (phy soft metrics).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 demo/main.cpp                       |  20 ++
 tools/precoder/corruption_survey.py | 287 ++++++++++++++++++++++++++++
 2 files changed, 307 insertions(+)
 create mode 100644 tools/precoder/corruption_survey.py

diff --git a/demo/main.cpp b/demo/main.cpp
index be9a6e8..1aa014f 100644
--- a/demo/main.cpp
+++ b/demo/main.cpp
@@ -29,6 +29,26 @@ static void packetProcessor(const Packet &packet) {
     printf("<devourer>RX pkt #%d (len=%zu)\n", g_rx_count, packet.Data.size());
     fflush(stdout);
   }
+  /* DEVOURER_RX_DUMP_ALL=1: emit a `<devourer-corrupt-any>` line for EVERY
+   * frame regardless of SA, with chip-flag bits and phy-soft metrics.
+   * Consumed by tools/precoder/corruption_survey.py for the FEC-design
+   * corruption-pattern survey. Pairs with DEVOURER_RX_KEEP_CORRUPTED to
+   * also pass through chip-FCS-error frames. The body is omitted from this
+   * line by design (a hot survey would inflate the log past usable size);
+   * pkt_len + the chip flags + phy metrics is what aggregates carry. */
+  static const bool dump_all = std::getenv("DEVOURER_RX_DUMP_ALL") != nullptr;
+  if (dump_all) {
+    printf("<devourer-corrupt-any>len=%zu crc_err=%u icv_err=%u "
+           "rate=%u rssi=%d,%d evm=%d,%d snr=%d,%d\n",
+           packet.Data.size(),
+           packet.RxAtrib.crc_err ? 1u : 0u,
+           packet.RxAtrib.icv_err ? 1u : 0u,
+           packet.RxAtrib.data_rate,
+           packet.RxAtrib.rssi[0], packet.RxAtrib.rssi[1],
+           packet.RxAtrib.evm[0], packet.RxAtrib.evm[1],
+           packet.RxAtrib.snr[0], packet.RxAtrib.snr[1]);
+    fflush(stdout);
+  }
   /* TX-validation hook: detect frames whose SA matches the txdemo's hardcoded
    * injected beacon (57:42:75:05:d6:00). When running this RX demo against
    * one adapter while WiFiDriverTxDemo runs against another on the same
diff --git a/tools/precoder/corruption_survey.py b/tools/precoder/corruption_survey.py
new file mode 100644
index 0000000..cdf36c3
--- /dev/null
+++ b/tools/precoder/corruption_survey.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+"""Corruption-pattern survey for FEC design.
+
+Reads `<devourer-corrupt-any>` lines (emitted by `WiFiDriverDemo` with
+`DEVOURER_RX_DUMP_ALL=1` + `DEVOURER_RX_KEEP_CORRUPTED=1`) and aggregates
+descriptive statistics about the corruption rate, frame-size distribution,
+and link-quality distribution — the empirical inputs for choosing the
+right stream-layer FEC scheme.
+
+Workflow:
+
+    sudo DEVOURER_PID=0x0120 DEVOURER_VID=0x2357 DEVOURER_CHANNEL=6 \
+         DEVOURER_RX_DUMP_ALL=1 DEVOURER_RX_KEEP_CORRUPTED=1 \
+         ./build/WiFiDriverDemo | \
+        python3 tools/precoder/corruption_survey.py [--duration 300]
+
+The tool reads stdin until EOF or `--duration` seconds have elapsed, then
+prints a report with:
+
+* Headline counts (total / chip-clean / chip-corrupt; corruption rate %)
+* Frame-size distribution (clean and corrupt separately)
+* Mean / median phy metrics (RSSI / EVM / SNR) for clean and corrupt
+  populations
+* BER-by-SNR-bucket equivalent in this context (per-bucket corruption
+  rate, since we don't have ground truth for arbitrary frames)
+* Burst-vs-isolated stats: per-second time series of corruption events
+  to see whether errors cluster (suggests interference) or spread evenly
+  (suggests sustained marginal SNR)
+
+What this tells the FEC designer:
+
+* If `chip-corrupt` is dominated by a single bucket (e.g. all-or-nothing
+  near sync threshold), inter-frame FEC (Reed-Solomon across N frames
+  with K parity frames) is the right strategy — recovers from K dropped
+  frames, light overhead.
+* If corruption rate scales smoothly with SNR and frames have correctable
+  partial corruption (per-frame BER < 1%), intra-frame FEC (interleaving
+  + light parity) is viable on top.
+* If errors burst (rate >5×average in 1-second windows), the inter-frame
+  FEC block size N has to be large enough that a burst doesn't exceed K
+  losses inside one block.
+"""
+
+from __future__ import annotations
+
+import argparse
+import collections
+import re
+import select
+import statistics
+import sys
+import time
+from typing import Optional
+
+_CORRUPT_ANY_RE = re.compile(
+    r"<devourer-corrupt-any>len=(?P<len>\d+)\s+"
+    r"crc_err=(?P<crc_err>\d+)\s+icv_err=(?P<icv_err>\d+)\s+"
+    r"rate=(?P<rate>\d+)\s+"
+    r"rssi=(?P<rssi_a>-?\d+),(?P<rssi_b>-?\d+)\s+"
+    r"evm=(?P<evm_a>-?\d+),(?P<evm_b>-?\d+)\s+"
+    r"snr=(?P<snr_a>-?\d+),(?P<snr_b>-?\d+)"
+)
+
+
+def _len_bucket(n: int) -> str:
+    """802.11-friendly length buckets."""
+    if n < 64:
+        return "  <64 B  (ack/cts/control)"
+    if n < 256:
+        return " 64-255  (mgmt/short data)"
+    if n < 768:
+        return "256-767  (data/probe-resp)"
+    if n < 1500:
+        return "768-1499 (data/aggreg)"
+    return ">=1500   (data/jumbo/aggreg)"
+
+
+def _snr_bucket(s: int) -> str:
+    base = (s // 5) * 5
+    return f"{base:>3d}-{base + 5} dB"
+
+
+def _effective_snr(snr_a: int, snr_b: int) -> int:
+    """Same convention as corruption_analysis.py: max picks the active path
+    on 1T1R (B reads 0) and the stronger path on 2T2R single-stream."""
+    return max(snr_a, snr_b)
+
+
+def _print_dist(title: str, counter: collections.Counter, total: int) -> None:
+    print(f"\n{title}:")
+    print(f"  {'bucket':<30s} {'count':>8s} {'%':>6s}")
+    for bucket, count in sorted(counter.items()):
+        pct = 100.0 * count / max(1, total)
+        print(f"  {bucket:<30s} {count:>8d} {pct:>5.1f}%")
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    ap = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    ap.add_argument("--duration", type=float, default=0.0,
+                    help="seconds to collect before reporting (default 0 = EOF)")
+    ap.add_argument("--bucket-secs", type=float, default=1.0,
+                    help="time-bucket size for the burst/isolation analysis")
+    args = ap.parse_args(argv)
+
+    total = 0
+    n_clean = 0
+    n_corrupt = 0
+    by_rate = collections.Counter()
+    by_rate_corrupt = collections.Counter()
+    by_len_clean = collections.Counter()
+    by_len_corrupt = collections.Counter()
+    rssi_clean: list[int] = []
+    rssi_corrupt: list[int] = []
+    snr_clean: list[int] = []
+    snr_corrupt: list[int] = []
+    evm_clean: list[int] = []
+    evm_corrupt: list[int] = []
+    # Per-SNR-bucket: total frames + corrupted frames → corruption rate.
+    bucket_total: collections.Counter = collections.Counter()
+    bucket_corrupt: collections.Counter = collections.Counter()
+    # Per-time-bucket corruption count for burst analysis.
+    time_corrupt: collections.Counter = collections.Counter()
+    time_total: collections.Counter = collections.Counter()
+
+    start = time.monotonic()
+    deadline = start + args.duration if args.duration > 0 else None
+    no_snr = 0
+
+    for line in sys.stdin:
+        m = _CORRUPT_ANY_RE.search(line)
+        if not m:
+            continue
+        now = time.monotonic()
+        if deadline is not None and now > deadline:
+            break
+        total += 1
+        crc_err = int(m.group("crc_err"))
+        icv_err = int(m.group("icv_err"))
+        corrupted = bool(crc_err or icv_err)
+        plen = int(m.group("len"))
+        rate = int(m.group("rate"))
+        rssi_a = int(m.group("rssi_a")); rssi_b = int(m.group("rssi_b"))
+        evm_a = int(m.group("evm_a"));   evm_b = int(m.group("evm_b"))
+        snr_a = int(m.group("snr_a"));   snr_b = int(m.group("snr_b"))
+        # The chip only populates evm/snr for OFDM data frames; for CCK ACKs
+        # and short mgmt frames both paths read 0. Treat (0,0) as "no
+        # measurement" rather than "0 dB" so we don't artificially fill the
+        # 0-5 dB SNR bucket with frames that have no measurement at all.
+        snr_present = not (snr_a == 0 and snr_b == 0)
+        evm_present = not (evm_a == 0 and evm_b == 0)
+        eff_snr = _effective_snr(snr_a, snr_b) if snr_present else None
+        eff_rssi = max(rssi_a, rssi_b)
+        eff_evm = min(evm_a, evm_b) if evm_present else None
+
+        by_rate[rate] += 1
+        if corrupted:
+            by_rate_corrupt[rate] += 1
+            n_corrupt += 1
+            by_len_corrupt[_len_bucket(plen)] += 1
+            rssi_corrupt.append(eff_rssi)
+            if eff_snr is not None: snr_corrupt.append(eff_snr)
+            if eff_evm is not None: evm_corrupt.append(eff_evm)
+        else:
+            n_clean += 1
+            by_len_clean[_len_bucket(plen)] += 1
+            rssi_clean.append(eff_rssi)
+            if eff_snr is not None: snr_clean.append(eff_snr)
+            if eff_evm is not None: evm_clean.append(eff_evm)
+
+        if eff_snr is None:
+            no_snr += 1
+        else:
+            bucket = _snr_bucket(eff_snr)
+            bucket_total[bucket] += 1
+            if corrupted:
+                bucket_corrupt[bucket] += 1
+        # The temporal bucketing is only meaningful for live captures; if
+        # we're reading a pre-captured file all lines arrive at ~the same
+        # instant. The report block guards on having at least 2 buckets.
+        tb = int((now - start) / args.bucket_secs)
+        time_total[tb] += 1
+        if corrupted:
+            time_corrupt[tb] += 1
+
+    elapsed = max(1e-9, time.monotonic() - start)
+    if total == 0:
+        sys.stderr.write("survey: no <devourer-corrupt-any> lines parsed; "
+                         "is DEVOURER_RX_DUMP_ALL set?\n")
+        return 1
+
+    realtime = elapsed > 1.0  # only meaningful when we ran live
+    print(f"=== corruption survey ({total} frames"
+          + (f", {elapsed:.1f}s live" if realtime else ", file/pipe")
+          + ") ===")
+    print(f"chip-clean       : {n_clean:6d} ({100.0 * n_clean / total:5.1f}%)")
+    print(f"chip-corrupt     : {n_corrupt:6d} ({100.0 * n_corrupt / total:5.1f}%)")
+    print(f"corruption rate  : {100.0 * n_corrupt / total:.2f}%")
+    if realtime:
+        print(f"frame rate       : {total / elapsed:.1f} fps")
+    print(f"no-phy-measurement: {no_snr:6d}  "
+          f"(CCK/short frames where chip didn't populate snr/evm)")
+
+    # Corruption rate broken down by PHY rate index. Clean traffic is often
+    # dominated by CCK ACKs and beacons which always decode well, so the
+    # headline corruption rate understates what the FEC layer faces at the
+    # OFDM rate the stream link actually uses (index 4 = legacy 6M).
+    rate_names = {0: "1M CCK", 1: "2M CCK", 2: "5.5M CCK", 3: "11M CCK",
+                  4: "6M OFDM", 5: "9M OFDM", 6: "12M OFDM", 7: "18M OFDM",
+                  8: "24M OFDM", 9: "36M OFDM", 10: "48M OFDM", 11: "54M OFDM"}
+    print(f"\nCorruption rate by DESC_RATE:")
+    print(f"  {'idx':>4s} {'name':<12s} {'count':>8s} {'%':>6s}   "
+          f"{'corrupt':>8s} {'rate':>7s}")
+    for r in sorted(by_rate):
+        name = rate_names.get(r, f"MCS{r - 12}" if r >= 12 else f"rate{r}")
+        pct = 100.0 * by_rate[r] / total
+        crpt = by_rate_corrupt.get(r, 0)
+        cr_pct = 100.0 * crpt / max(1, by_rate[r])
+        print(f"  0x{r:02x} {name:<12s} {by_rate[r]:>8d} {pct:>5.1f}%   "
+              f"{crpt:>8d} {cr_pct:>5.1f}%")
+
+    _print_dist("Frame-size distribution (chip-clean)", by_len_clean, n_clean)
+    if n_corrupt:
+        _print_dist("Frame-size distribution (chip-corrupt)",
+                    by_len_corrupt, n_corrupt)
+
+    def _stats(name: str, xs_clean: list[int], xs_corrupt: list[int]) -> None:
+        def _stat(xs: list[int]) -> str:
+            if not xs:
+                return "n=0"
+            xs = sorted(xs)
+            n = len(xs)
+            return (f"n={n} min={xs[0]} p25={xs[n // 4]} "
+                    f"med={xs[n // 2]} p75={xs[(3 * n) // 4]} max={xs[-1]} "
+                    f"mean={statistics.fmean(xs):+.1f}")
+        print(f"\n{name}:")
+        print(f"  chip-clean    : {_stat(xs_clean)}")
+        print(f"  chip-corrupt  : {_stat(xs_corrupt)}")
+
+    _stats("RSSI (stronger path)", rssi_clean, rssi_corrupt)
+    _stats("EVM  (weaker path; more negative is better)",
+           evm_clean, evm_corrupt)
+    _stats("SNR  (stronger path, dB)", snr_clean, snr_corrupt)
+
+    print(f"\nCorruption rate by SNR bucket (stronger path, 5-dB buckets):")
+    print(f"  bucket       frames   corrupt   rate")
+    for bucket in sorted(bucket_total):
+        n = bucket_total[bucket]
+        c = bucket_corrupt[bucket]
+        print(f"  {bucket:>11s}   {n:6d}   {c:7d}   {100.0 * c / max(1, n):5.1f}%")
+
+    if realtime and time_total:
+        print(f"\nTemporal distribution ({args.bucket_secs:.1f}-s buckets, "
+              f"top 10 buckets by corruption count):")
+        print(f"  t-start    total   corrupt   rate")
+        top = sorted(time_corrupt.items(), key=lambda kv: -kv[1])[:10]
+        for tb, c in top:
+            t = tb * args.bucket_secs
+            n = time_total[tb]
+            print(f"  {t:6.1f}s   {n:6d}   {c:7d}   "
+                  f"{100.0 * c / max(1, n):5.1f}%")
+        # Burst-vs-baseline heuristic
+        rates = [time_corrupt[tb] / max(1, time_total[tb])
+                 for tb in time_total]
+        if rates:
+            med_rate = statistics.median(rates)
+            max_rate = max(rates)
+            print(f"  median 1-s corruption rate: {100 * med_rate:.1f}%")
+            print(f"  peak   1-s corruption rate: {100 * max_rate:.1f}%")
+            if med_rate > 0 and max_rate > 5 * med_rate:
+                print(f"  → BURSTY: peak is >5× median, sustained interference "
+                      f"or coverage edge events. FEC block size should be "
+                      f"large enough to span typical bursts.")
+            elif med_rate < 0.01:
+                print(f"  → CLEAN: well-conditioned link; FEC overhead can be "
+                      f"low / opportunistic.")
+            else:
+                print(f"  → SUSTAINED: corruption is evenly distributed; light "
+                      f"per-frame FEC + cross-frame parity should suffice.")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())