diff --git a/.gitignore b/.gitignore index f678859..c16c46a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -# Debug server state (generated by debug.sh) +# Debug server state (generated by verify.py) .debug/ # Compiled Lua sources @@ -42,3 +42,9 @@ luac.out *.x86_64 *.hex + +# Riptide artifacts (cloud-synced) +.humanlayer/tasks/ + +# Python bytecode cache (verify.py / sandbox.py) +__pycache__/ diff --git a/.pi/skills/factorio-mod-dev/DEBUGGING.md b/.pi/skills/factorio-mod-dev/DEBUGGING.md index 2c48686..9a33f82 100644 --- a/.pi/skills/factorio-mod-dev/DEBUGGING.md +++ b/.pi/skills/factorio-mod-dev/DEBUGGING.md @@ -10,26 +10,37 @@ Errors appear in `factorio-current.log` just after the script checksum lines. ## Protocol -1. `./debug.sh reset` + run `--create` → fix data-stage errors to zero -2. `./debug.sh` → fix control-stage errors (check log after checksum lines) -3. `./debug.sh reset` again → verify `on_init` ran (`storage.creative_mode ~= nil`) -4. Use RCON to inspect live state +1. `uv run verify.py load` → data + control load gate. It runs `--create`, scans + the log for `^Error`, and asserts the control-stage sentinel + `CREATIVE_MOD_CONTROL_OK` is present. `load=FAIL (data/control error)` → + data/control-stage error to fix; `load=FAIL (control stage incomplete)` → the + silent mid-`require` crash (see below). +2. `uv run verify.py behavior` → boots the headless server and asserts `on_init` + ran (`storage_initialized`) and the default state (`default_disabled`). +3. Use `uv run verify.py shell ''` to inspect live state interactively. ## Silent control-stage failure If `control.lua` crashes mid-`require`, the game still starts and RCON responds — -but all mod globals are `nil` and `on_init` never fires. Diagnose: +but all mod globals are `nil` and `on_init` never fires. `verify.py load` catches +this via the missing sentinel (`load=FAIL (control stage incomplete)`). To +diagnose at runtime, drive the mod's own remote interface (a bare `/c` runs in +the scenario context, not the mod's): ```bash -./rcon.sh '/c rcon.print(tostring(storage.creative_mode ~= nil))' +uv run verify.py shell '/c rcon.print(tostring(pcall(function() return remote.call("creative-mode", "is_enabled") end)))' ``` -`false`/`nil` → look in `factorio-current.log` right after the mod's checksum line. +`false` → the call errored / `on_init` never ran; look in `factorio-current.log` +right after the mod's checksum line. ## Save lifecycle gotcha `--create` with a broken `control.lua` creates the save but skips `on_init`. -After fixing control-stage errors, always `./debug.sh reset` — a server restart alone is not enough. +After fixing control-stage errors, re-run `uv run verify.py load` (it re-runs +`--create` every time, emitting a fresh sentinel) and `uv run verify.py behavior` +to confirm `on_init` ran; use `--clean` to recreate the save from scratch when a +restart alone is not enough. ## RCON caveats @@ -42,9 +53,9 @@ After fixing control-stage errors, always `./debug.sh reset` — a server restar ## Porting to a new Factorio version 1. Bump `factorio_version` and `base >=` in `info.json` -2. Fix data-stage errors (`--create`) -3. Fix control-stage errors (`--start-server`) -4. `./debug.sh reset` — confirm `on_init` ran +2. Fix data-stage errors (`uv run verify.py load`) +3. Fix control-stage errors (`uv run verify.py load` → sentinel present) +4. `uv run verify.py behavior` — confirm `on_init` ran **Reference:** `data/changelog.txt` in the Factorio install lists every API change by version. diff --git a/.pi/skills/factorio-mod-dev/SKILL.md b/.pi/skills/factorio-mod-dev/SKILL.md index 0901c33..ba6cbf2 100644 --- a/.pi/skills/factorio-mod-dev/SKILL.md +++ b/.pi/skills/factorio-mod-dev/SKILL.md @@ -1,6 +1,6 @@ --- name: factorio-mod-dev -description: Develop and debug the creative-mod Factorio mod. Use when working on mod scripts, prototypes, GUI, events, or porting to a new Factorio version. Covers repo layout, mod structure, key internals, and the debug toolchain. +description: Develop and debug the creative-mod Factorio mod. Use when working on mod scripts, prototypes, GUI, events, or porting to a new Factorio version. Covers repo layout, mod structure, key internals, and the verify.py verification pipeline. --- # Factorio Mod Dev @@ -21,18 +21,42 @@ creative-mod/ └── locale/ # translations ``` -## Debug toolchain +## Verification loop + +`verify.py` is the canonical way to check the mod. It loads creative-mod in the +local Factorio install, runs assertions, and exits `0`/non-zero with a stable, +greppable `RESULT:` line, so you can edit → verify → read result → iterate. +Run it via `uv`: + +```bash +uv run verify.py doctor # preflight: factorio binary + version, uv, jq +uv run verify.py static # luacheck . + stylua --check . +uv run verify.py load # data + control load gate (incl. silent-crash guard) +uv run verify.py behavior # headless server + RCON assertion batch +uv run verify.py all # static → load → behavior, aggregated +uv run verify.py --help +``` + +The layered model is **static → load → behavior** (cheapest to deepest); `all` +runs the three in sequence. Read the result by grepping `^RESULT:` and/or +checking `$?`: + +``` +RESULT: load=PASS # exit 0 +RESULT: load=FAIL (control stage incomplete) # exit non-zero, reason names the failure +``` + +For investigation, use the bounded tooling modes (successors to the removed +standalone shell wrappers): ```bash -./debug.sh # start headless server -./debug.sh gui --window-size 1920x1080 # start with full GUI (windowed) -./debug.sh log # tail factorio-current.log -./debug.sh reset # wipe save → re-triggers on_init -./rcon.sh '/c rcon.print(...)' # one-shot RCON command -./rcon-shell.sh # interactive REPL +uv run verify.py shell '/c rcon.print(game.tick)' # one-shot RCON; omit arg for a stdin REPL +uv run verify.py debug --command '/c ...' # bounded headless session +uv run verify.py debug --gui # manual-only graphical escape hatch +uv run verify.py load --clean # recreate the debug save from scratch ``` -Output channels: +Output channels for the values you inspect: | Goal | Use | Where | |---|---|---| @@ -40,7 +64,9 @@ Output channels: | Trace code | `log("msg")` | `factorio-current.log` | | Dump large table | `helpers.write_file("f", d)` | `.debug/script-output/f` | -→ See `DEBUG.md` for full tool reference. +→ See `VERIFY.md` (this skill folder) for the full subcommand reference, the +`RESULT:`/exit-code contract, and the replicable local install setup. +→ See `DEBUG.md` for the output-channel reference. → See `DEBUGGING.md` (this skill folder) for debugging methodology and porting guide. → See `RELEASE.md` (this skill folder) for release checklist and GitHub Actions workflow reference. diff --git a/.pi/skills/factorio-mod-dev/VERIFY.md b/.pi/skills/factorio-mod-dev/VERIFY.md new file mode 100644 index 0000000..d462bab --- /dev/null +++ b/.pi/skills/factorio-mod-dev/VERIFY.md @@ -0,0 +1,148 @@ +# verify.py — Verification Pipeline Reference + +`verify.py` is the canonical way to check the mod. It is a single bounded tool +that loads creative-mod in the local Factorio install, runs assertions, and +exits `0`/non-zero with a stable, greppable `RESULT:` line so an agent can +edit → verify → read result → iterate without a human. **Local only** (not CI). + +Run it via `uv` (it has a PEP 723 inline header; stdlib only, `rcon.py` and +`sandbox.py` imported as local modules): + +```bash +uv run verify.py [flags] +``` + +## The RESULT / exit-code contract + +Every subcommand prints **exactly one** line of the form: + +``` +RESULT: =PASS +RESULT: =FAIL (reason) +``` + +and exits `0` on PASS, non-zero on FAIL. To drive it programmatically, grep for +`^RESULT:` and/or check `$?`. The `reason` names the failing tool / assertion / +phase, e.g.: + +``` +RESULT: static=FAIL (luacheck=PASS stylua=FAIL) +RESULT: load=FAIL (control stage incomplete) +RESULT: behavior=FAIL (assert storage_initialized) +RESULT: all=FAIL (static=PASS load=PASS behavior=FAIL) +``` + +Layered subcommands also print their per-step lines (e.g. `assert +storage_initialized=PASS (...)`) before the final `RESULT:` line, so partial +progress is visible. + +## Subcommands + +| Subcommand | What it does | Layer | +|---|---|---| +| `doctor` | Preflight: Factorio binary + `--version`, `uv`, `jq` on PATH. Distinguishes "install problem" from "mod problem". | preflight | +| `static` | Wraps `luacheck .` + `stylua --check .` (same invocations as `lint.yml`); excludes the gitignored `.debug/` sandbox from luacheck so the local result matches a clean checkout. | static | +| `load` | Bootstraps the `.debug/` sandbox, runs the bounded `--create` data+control stage, scans the log for `^Error`, and asserts the control-stage sentinel `CREATIVE_MOD_CONTROL_OK` is present (guards the silent control-crash case). | load | +| `behavior` | Boots the headless server, polls RCON until it answers, runs a read-only assertion batch, then terminates + reaps under a watchdog. | behavior | +| `all` | Runs `static` → `load` → `behavior` in sequence and aggregates into one `RESULT: all=…` line. | all | +| `debug` | Bounded, scriptable headless session: boot server, poll RCON, optional one-shot `--command`, reap under a watchdog. `--gui` is the manual-only graphical escape hatch. | tooling | +| `shell` | Bounded RCON pass-through: one-shot command argument, or stdin REPL (auto-prefixes `/c`). Attaches to a running server or starts a bounded one. | tooling | + +### Flags + +- `static` — no flags. +- `load` — `--clean` (recreate the debug save from scratch; default reuses for a + fast loop), `--timeout ` (hard timeout for `--create`, default `180`). +- `behavior` / `all` / `debug` / `shell` — `--clean`, `--timeout `, and + `--ready-timeout ` (hard timeout to wait for the server to answer RCON, + default `120`). +- `debug` additionally — `--command ''` (one-shot, run once ready) and + `--gui` (manual-only full graphical client; blocks, needs a display). +- `shell` additionally — a positional `command` argument (one-shot); omit it to + read commands from stdin. + +### Examples + +```bash +uv run verify.py doctor +uv run verify.py static +uv run verify.py load +uv run verify.py load --clean +uv run verify.py behavior +uv run verify.py all +uv run verify.py shell '/c rcon.print(game.tick)' +uv run verify.py debug --command '/c rcon.print(tostring(remote.call("creative-mode", "is_enabled")))' +uv run verify.py debug --gui # manual escape hatch +``` + +## The behavior assertion batch (read-only) + +Assertions run in the **mod's** context via the remote interface — a bare `/c` +runs in the *scenario* script context, where `storage` is the scenario's +storage, not creative-mod's. So the batch drives the mod's own interface: + +- `storage_initialized` — `remote.call("creative-mode", "is_enabled")` succeeds + (`on_init` ran to completion → runtime confirmation of the silent-crash guard). +- `default_disabled` — that same call returns `false` (creative mode off by + default). + +The batch is fully read-only for now; the GUI-driven "enable all cheats" path is +out of scope (no connected player on a headless server). + +## Replicable local install setup + +The verifier assumes a working local Factorio install is present; it does not +install Factorio. `verify.py doctor` is the runnable companion that confirms the +prerequisites below. To reproduce the environment on a new machine: + +### Factorio binary + +- **Factorio 2.1.7** (full install — the base mods `base`, `elevated-rails`, + `quality`, `space-age` ship with it, so no mod provisioning step is needed). +- The binary path is **fixed and self-located** relative to the mod: + `../../bin/x64/factorio` (resolved from `verify.py`'s own location, exactly as + the old shell launcher derived it from `SCRIPT_DIR`). Nothing is read from + environment variables. + +### Tooling on PATH + +- **`uv`** — runs `verify.py` via its PEP 723 inline header. +- **`jq`** — used in the toolchain (and checked by `doctor`). +- **`stylua`** — Lua formatter; `static` runs `stylua --check .`. It skips + gitignored paths automatically. +- **`luacheck`** — Lua linter; `static` runs `luacheck .`. **It must be built + against Lua 5.3** — it crashes under the system Lua 5.5. Install it via + luarocks pinned to Lua 5.3, into the per-user tree: + + ```bash + luarocks --lua-version=5.3 install luacheck --local + ``` + + This puts the `luacheck` binary under `~/.luarocks/bin`, which **must be on + `PATH`** for `verify.py static` (and `doctor`'s assumptions) to find it: + + ```bash + export PATH="$HOME/.luarocks/bin:$PATH" + ``` + +### Sandbox paths (created by `sandbox.py`) + +`verify.py` stands up an isolated `.debug/` sandbox next to the mod (gitignored, +absent in CI): + +``` +.debug/ +├── config/config.ini # read-data → factorio/data, write-data → .debug/ +├── mods/ +│ ├── creative-mod_ → ../../ (symlink to the live working tree) +│ ├── mod-list.json # copied from mods_dev/ +│ └── mod-settings.dat # copied from mods_dev/ (if present) +├── saves/debug-save.zip # the live debug save +├── factorio-current.log # game + Lua log +├── console.log # server console / RCON command log +└── script-output/ # helpers.write_file() output +``` + +The live-tree symlink means edits are instant (no packaging step). The symlink is +re-pointed each run to the current version, and stale differently-versioned +symlinks are pruned. RCON runs on port `27015` with password `factorio-debug`. diff --git a/DEBUG.md b/DEBUG.md index 2cf1616..f5d4687 100644 --- a/DEBUG.md +++ b/DEBUG.md @@ -2,37 +2,34 @@ Headless debug setup for **creative-mod**. No GUI, no Steam, isolated from `~/.factorio`. +All debugging now goes through `verify.py` (run via `uv`). It replaces the old +standalone shell wrappers with one bounded tool. +→ See `.pi/skills/factorio-mod-dev/VERIFY.md` for the full reference. + ## Quick Start ```bash -# Terminal 1 — start the server (keeps running) -./debug.sh +# Verify the mod loads and behaves (bounded — always returns) +uv run verify.py load +uv run verify.py behavior +uv run verify.py all -# Terminal 2 — inspect values -./rcon.sh '/c rcon.print(game.tick)' -./rcon.sh '/c rcon.print(serpent.block(storage))' +# Inspect values via a one-shot RCON command +uv run verify.py shell '/c rcon.print(game.tick)' +uv run verify.py shell '/c rcon.print(serpent.block(storage))' -# Terminal 3 — watch the game log -./debug.sh log +# Watch the game log +tail -f .debug/factorio-current.log ``` -## Scripts +## Tools -| Script | Purpose | +| Tool | Purpose | |---|---| -| `debug.sh` | Start headless Factorio server with RCON | -| `rcon.sh ''` | Send one RCON command, print response | -| `rcon-shell.sh` | Interactive REPL (`factorio>` prompt) | -| `rcon.py` | Python RCON client (used by the above) | - -### `debug.sh` subcommands - -```bash -./debug.sh # start server (blocks until Ctrl-C) -./debug.sh log # tail .debug/factorio-current.log -./debug.sh console # tail .debug/console.log -./debug.sh reset # delete the save and recreate it (wipes state) -``` +| `uv run verify.py shell ''` | Send one RCON command, print response (omit arg for a stdin REPL) | +| `uv run verify.py debug` | Bounded headless session; `--command` one-shot, `--gui` graphical escape hatch | +| `uv run verify.py load --clean` | Recreate the debug save from scratch (wipes state) | +| `rcon.py` | Python RCON client (imported as a module by `verify.py`; also a standalone CLI) | ## Output Channels @@ -61,8 +58,6 @@ Best for tracing code paths inside the mod. Same file Factorio writes Lua errors Tail it live: ```bash -./debug.sh log -# or tail -f .debug/factorio-current.log ``` @@ -82,15 +77,20 @@ cat .debug/script-output/storage.txt ## Common Debug Patterns +> Note: a bare `/c` runs in the **scenario** script context, where `storage` is +> the scenario's storage — not creative-mod's per-mod storage. To read the mod's +> own state, drive its remote interface, e.g. +> `remote.call("creative-mode", "is_enabled")`. + ### Inspect the global storage table ```bash -./rcon.sh '/c rcon.print(serpent.block(storage))' -./rcon.sh '/c rcon.print(serpent.block(storage.creative_mode))' +uv run verify.py shell '/c rcon.print(serpent.block(storage))' +uv run verify.py shell '/c rcon.print(serpent.block(storage.creative_mode))' ``` ### Check if an entity exists in a surface ```bash -./rcon.sh '/c +uv run verify.py shell '/c local e = game.surfaces[1].find_entities_filtered{name="creative-chest"} rcon.print("#entities: " .. #e) ' @@ -98,23 +98,23 @@ cat .debug/script-output/storage.txt ### Trigger a mod event manually ```bash -./rcon.sh '/c script.raise_event(defines.events.on_player_created, {player_index=1})' +uv run verify.py shell '/c script.raise_event(defines.events.on_player_created, {player_index=1})' ``` ### Check a player's state (after joining via multiplayer) ```bash -./rcon.sh '/c rcon.print(serpent.block(game.players[1]))' +uv run verify.py shell '/c rcon.print(serpent.block(game.players[1]))' ``` ### Dump all mod settings ```bash -./rcon.sh '/c rcon.print(serpent.block(settings.startup))' -./rcon.sh '/c rcon.print(serpent.block(settings.global))' +uv run verify.py shell '/c rcon.print(serpent.block(settings.startup))' +uv run verify.py shell '/c rcon.print(serpent.block(settings.global))' ``` ### Watch for Lua errors ```bash -./debug.sh log # errors appear as: "Error while running event ..." +tail -f .debug/factorio-current.log # errors appear as: "Error while running event ..." ``` ## How the Setup Works @@ -134,7 +134,7 @@ cat .debug/script-output/storage.txt - The symlink means **live edits are instant** — no packaging step needed. - The save persists across server restarts (Factorio saves on SIGTERM). -- Use `./debug.sh reset` to recreate the save from scratch (clears all `storage`). +- Use `uv run verify.py load --clean` to recreate the save from scratch (clears all `storage`). ## First-Run Achievement Warning @@ -145,6 +145,15 @@ The first `/c` command in a fresh game triggers: ## Requirements -- `jq` — reads `info.json` for mod name/version -- `python3` — runs the RCON client (`rcon.py`) -- Factorio binary at `../../bin/x64/factorio` relative to repo root +`uv run verify.py doctor` checks these. See +`.pi/skills/factorio-mod-dev/VERIFY.md` for the full replicable install setup. + +- **Factorio 2.1.7** binary at `../../bin/x64/factorio` relative to the mod (full + install — base mods ship with it). +- **`uv`** — runs `verify.py`. +- **`jq`** — reads `info.json` for mod name/version. +- **`stylua`** — Lua formatter (`verify.py static`). +- **`luacheck`** — Lua linter (`verify.py static`); **must be built against Lua + 5.3** (it crashes under Lua 5.5). Install via + `luarocks --lua-version=5.3 install luacheck --local` and add `~/.luarocks/bin` + to `PATH`. diff --git a/control.lua b/control.lua index e933562..fc04f10 100644 --- a/control.lua +++ b/control.lua @@ -79,3 +79,9 @@ remote.add_interface(creative_mode_defines.names.interface, remote_interface.rem cheats.enable_or_disable_creative_mode(player, true, false, false, false) end end)]] + +-- Verification sentinel: emitted only if the control stage parses to completion +-- (after every require + all event registration). A mid-require crash leaves +-- this line absent, which verify.py greps factorio-current.log for to detect the +-- documented "starts but storage.creative_mode is nil" silent failure. +log("CREATIVE_MOD_CONTROL_OK") diff --git a/debug.sh b/debug.sh deleted file mode 100644 index d3c5cbc..0000000 --- a/debug.sh +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env bash -# debug.sh — Launch Factorio headless with creative-mod for debugging -# -# Usage: -# ./debug.sh — start headless server (blocks; Ctrl-C to stop) -# ./debug.sh reset — delete the debug save and recreate it -# ./debug.sh log — tail the game log (while server is running) -# ./debug.sh console — tail the console log (game.print output) -# -# Output channels: -# .debug/factorio-current.log — all Lua errors, log() calls, engine messages -# .debug/factorio-current.log — Lua log() calls and engine messages -# .debug/console.log — raw server console (commands, warnings) -# .debug/script-output/ — game.write_file() output -# -# After the server is up, use rcon.sh or rcon.py to send commands: -# ./rcon.sh '/c rcon.print(game.tick)' — inspect value (echoed back) -# ./rcon.sh '/c rcon.print(serpent.block(storage))' — inspect table -# ./rcon.sh '/c log("hello")' — write to factorio-current.log - -set -euo pipefail - -# --------------------------------------------------------------------------- -# Paths -# --------------------------------------------------------------------------- -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -FACTORIO_BIN="$(realpath "$SCRIPT_DIR/../../bin/x64/factorio")" -MODS_DEV_DIR="$(realpath "$SCRIPT_DIR/..")" # mods_dev/ (parent of this repo) - -VERSION=$(jq -r .version "$SCRIPT_DIR/info.json") -MOD_NAME=$(jq -r .name "$SCRIPT_DIR/info.json") -VERSIONED_NAME="${MOD_NAME}_${VERSION}" - -DEBUG_DIR="$SCRIPT_DIR/.debug" -MODS_DIR="$DEBUG_DIR/mods" -SAVES_DIR="$DEBUG_DIR/saves" -CONFIG_DIR="$DEBUG_DIR/config" -SAVE_FILE="$SAVES_DIR/debug-save.zip" -CONSOLE_LOG="$DEBUG_DIR/console.log" -GAME_LOG="$DEBUG_DIR/factorio-current.log" -SCRIPT_OUTPUT="$DEBUG_DIR/script-output" - -RCON_PORT=27015 -RCON_PASS="factorio-debug" - -# --------------------------------------------------------------------------- -# Subcommands -# --------------------------------------------------------------------------- -case "${1:-}" in - log) - echo "Tailing game log: $GAME_LOG" - exec tail -f "$GAME_LOG" - ;; - console) - echo "Tailing console log: $CONSOLE_LOG" - exec tail -f "$CONSOLE_LOG" - ;; - reset) - echo "Deleting save: $SAVE_FILE" - rm -f "$SAVE_FILE" - echo "Done. Run ./debug.sh to recreate and start." - exit 0 - ;; - gui) - # Launch with full GUI (singleplayer, no headless) - shift - exec "$FACTORIO_BIN" \ - --config "$CONFIG_DIR/config.ini" \ - --mod-directory "$MODS_DIR" \ - --load-game "$SAVE_FILE" \ - "$@" - ;; -esac - -# --------------------------------------------------------------------------- -# Setup -# --------------------------------------------------------------------------- -echo "=== Factorio Debug Launcher ===" -echo "Binary : $FACTORIO_BIN" -echo "Mod : $VERSIONED_NAME" -echo "Debug : $DEBUG_DIR" -echo "" - -mkdir -p "$MODS_DIR" "$SAVES_DIR" "$CONFIG_DIR" "$SCRIPT_OUTPUT" - -# Symlink the live mod source into the debug mods dir (versioned name required) -if [ ! -L "$MODS_DIR/$VERSIONED_NAME" ]; then - ln -sfn "$SCRIPT_DIR" "$MODS_DIR/$VERSIONED_NAME" - echo "Symlinked mod: $MODS_DIR/$VERSIONED_NAME -> $SCRIPT_DIR" -fi - -# Always refresh the symlink target in case version changed -ln -sfn "$SCRIPT_DIR" "$MODS_DIR/$VERSIONED_NAME" - -# Copy mod-list.json from mods_dev (enables creative-mod, disables others) -cp "$MODS_DEV_DIR/mod-list.json" "$MODS_DIR/mod-list.json" - -# Copy mod-settings.dat if present -if [ -f "$MODS_DEV_DIR/mod-settings.dat" ]; then - cp "$MODS_DEV_DIR/mod-settings.dat" "$MODS_DIR/mod-settings.dat" -fi - -# Write a minimal config.ini pointing write-data at .debug/ -cat > "$CONFIG_DIR/config.ini" << EOF -; Factorio debug config — generated by debug.sh -; https://wiki.factorio.com/Factorio_wiki -[path] -read-data=__PATH__executable__/../../data -write-data=$DEBUG_DIR -EOF - -# --------------------------------------------------------------------------- -# Create save if missing -# --------------------------------------------------------------------------- -if [ ! -f "$SAVE_FILE" ]; then - echo "Creating new debug save..." - "$FACTORIO_BIN" \ - --config "$CONFIG_DIR/config.ini" \ - --mod-directory "$MODS_DIR" \ - --create "$SAVE_FILE" \ - --disable-audio \ - 2>&1 | tee -a "$GAME_LOG" - echo "Save created: $SAVE_FILE" -fi - -# --------------------------------------------------------------------------- -# Start headless server -# --------------------------------------------------------------------------- -echo "" -echo "Starting headless server..." -echo " RCON port : $RCON_PORT" -echo " RCON password: $RCON_PASS" -echo " Game log : $GAME_LOG" -echo " Console log : $CONSOLE_LOG" -echo " Script output: $SCRIPT_OUTPUT" -echo "" -echo "Output channels:" -echo " rcon.print(v) — value echoed back to rcon.sh ← best for inspection" -echo " log(\"msg\") — goes to factorio-current.log" -echo " helpers.write_file(f, data) — goes to .debug/script-output/ (Factorio 2.0)" -echo "" -echo "Quick commands:" -echo " ./rcon.sh '/c rcon.print(game.tick)' — inspect a value" -echo " ./rcon.sh '/c rcon.print(serpent.block(storage))'— pretty-print global table" -echo " ./rcon.sh '/c log(\"hello\")' — write to game log" -echo " ./debug.sh log — tail the game log" -echo " ./rcon-shell.sh — interactive REPL" -echo "" - -exec "$FACTORIO_BIN" \ - --config "$CONFIG_DIR/config.ini" \ - --mod-directory "$MODS_DIR" \ - --start-server "$SAVE_FILE" \ - --rcon-port "$RCON_PORT" \ - --rcon-password "$RCON_PASS" \ - --console-log "$CONSOLE_LOG" \ - --disable-audio diff --git a/rcon-shell.sh b/rcon-shell.sh deleted file mode 100644 index 37bfb1b..0000000 --- a/rcon-shell.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env bash -# rcon-shell.sh — Interactive RCON REPL for the Factorio debug server. -# -# Usage: -# ./rcon-shell.sh -# -# Tips: -# game.print("msg") → output to .debug/console.log -# log("msg") → output to .debug/factorio-current.log -# serpent.block(t) → pretty-print any table -# -# To watch output in another terminal: -# ./debug.sh log → tail factorio-current.log -# ./debug.sh console → tail console.log - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -RCON_PORT=27015 -RCON_PASS="factorio-debug" - -# Check server is reachable -if ! python3 "$SCRIPT_DIR/rcon.py" --port "$RCON_PORT" --password "$RCON_PASS" '/c ' &>/dev/null; then - echo "ERROR: Cannot connect to Factorio RCON at localhost:$RCON_PORT" - echo "Make sure the debug server is running: ./debug.sh" - exit 1 -fi - -echo "=== Factorio RCON Shell ===" -echo "Server: localhost:$RCON_PORT" -echo "Commands prefixed with /c run Lua. Type 'exit' or Ctrl-D to quit." -echo "" -echo "Tips:" -echo " rcon.print(value) -- value echoed back here ← best for inspection" -echo " log(\"msg\") -- writes to factorio-current.log" -echo " log(serpent.block(t)) -- pretty-print any table to game log" -echo " helpers.write_file(\"f\", v) -- write to .debug/script-output/f (Factorio 2.0)" -echo "" - -while true; do - printf "factorio> " - if ! read -r line; then - echo "" - break - fi - [ "$line" = "exit" ] && break - [ -z "$line" ] && continue - - # Auto-prepend /c if user typed raw Lua (no leading /) - if [[ "$line" != /* ]]; then - cmd="/c $line" - else - cmd="$line" - fi - - result=$(python3 "$SCRIPT_DIR/rcon.py" \ - --port "$RCON_PORT" \ - --password "$RCON_PASS" \ - "$cmd" 2>&1) || true - - if [ -n "$result" ]; then - echo "$result" - fi -done - -echo "Bye." diff --git a/rcon.py b/rcon.py index fc80965..5f1e249 100644 --- a/rcon.py +++ b/rcon.py @@ -8,7 +8,7 @@ Output channels — choose the right one for your use case: rcon.print(value) → echoed back as RCON response (shown by this script) - log("msg") → .debug/factorio-current.log (tail: ./debug.sh log) + log("msg") → .debug/factorio-current.log (uv run verify.py load reads it) helpers.write_file("f", data) → .debug/script-output/f (Factorio 2.0 API) Examples: @@ -76,7 +76,7 @@ def rcon_exec(host: str, port: int, password: str, command: str) -> str: except ConnectionRefusedError: print( f"ERROR: Cannot connect to {host}:{port} — is the server running?\n" - "Start it with: ./debug.sh", + "Start it with: uv run verify.py debug", file=sys.stderr, ) sys.exit(1) diff --git a/rcon.sh b/rcon.sh deleted file mode 100644 index cce25cc..0000000 --- a/rcon.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -# rcon.sh — Send a single RCON command to the Factorio debug server. -# -# Usage: -# ./rcon.sh '/c game.print("hello")' -# ./rcon.sh '/c log(serpent.block(storage))' -# ./rcon.sh '/c game.print(game.tick)' -# -# game.print() output → .debug/console.log (tail with: ./debug.sh console) -# log() output → .debug/factorio-current.log (tail with: ./debug.sh log) -# -# Run ./rcon-shell.sh for an interactive REPL. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -if [ $# -eq 0 ]; then - echo "Usage: $0 ''" - echo "Examples:" - echo " $0 '/c game.print(\"hello world\")'" - echo " $0 '/c log(serpent.block(storage))'" - exit 1 -fi - -exec python3 "$SCRIPT_DIR/rcon.py" "$@" diff --git a/sandbox.py b/sandbox.py new file mode 100644 index 0000000..261af0a --- /dev/null +++ b/sandbox.py @@ -0,0 +1,229 @@ +""" +sandbox.py — the single source of sandbox truth for verify.py. + +Owns the ``.debug/`` sandbox bootstrap (the mkdir tree, the live working-tree +symlink, the copied mod-list.json / mod-settings.dat, and the generated +config.ini) so every verify.py layer (load, behavior, debug) stands up exactly +the same isolated Factorio environment without re-implementing it or drifting. + +Paths are derived from this file's own location (its parent is the mod root); +nothing is read from environment variables. +""" + +import json +import subprocess +from dataclasses import dataclass +from pathlib import Path + +# --------------------------------------------------------------------------- +# Self-locating paths (derived from this file's own location) +# --------------------------------------------------------------------------- +ROOT = Path(__file__).resolve().parent +FACTORIO_BIN = (ROOT / ".." / ".." / "bin" / "x64" / "factorio").resolve() +MODS_DEV_DIR = (ROOT / "..").resolve() +INFO = json.loads((ROOT / "info.json").read_text()) +VERSIONED_NAME = f"{INFO['name']}_{INFO['version']}" + +DEBUG_DIR = ROOT / ".debug" +MODS_DIR = DEBUG_DIR / "mods" +SAVES_DIR = DEBUG_DIR / "saves" +CONFIG_DIR = DEBUG_DIR / "config" +SCRIPT_OUTPUT_DIR = DEBUG_DIR / "script-output" + +SAVE_FILE = SAVES_DIR / "debug-save.zip" +CONFIG_FILE = CONFIG_DIR / "config.ini" +GAME_LOG = DEBUG_DIR / "factorio-current.log" +CONSOLE_LOG = DEBUG_DIR / "console.log" + +RCON_PORT = 27015 +RCON_PASS = "factorio-debug" + +CONFIG_TEMPLATE = """\ +; Factorio debug config — generated by sandbox.py (verify.py) +; https://wiki.factorio.com/Factorio_wiki +[path] +read-data=__PATH__executable__/../../data +write-data={write_data} +""" + + +@dataclass(frozen=True) +class Sandbox: + """Resolved paths for the bootstrapped ``.debug/`` sandbox.""" + + debug_dir: Path + mods_dir: Path + saves_dir: Path + config_dir: Path + config_file: Path + save_file: Path + game_log: Path + console_log: Path + script_output_dir: Path + mod_symlink: Path + rcon_port: int + rcon_password: str + + +def bootstrap_sandbox(clean: bool = False) -> Sandbox: + """Stand up the ``.debug/`` sandbox. + + - Creates the mods/saves/config/script-output tree. + - Refreshes the live working tree symlink at ``.debug/mods/`` + (always re-pointed in case the version changed; stale differently-versioned + symlinks are removed so only the current version is present). + - Copies ``../mod-list.json`` (required) and ``../mod-settings.dat`` (optional) + into the sandbox mods dir. + - Writes the generated ``config.ini`` pointing write-data at ``.debug/``. + + When ``clean`` is True the existing save is deleted so the next ``--create`` + rebuilds it from scratch. + """ + for directory in (MODS_DIR, SAVES_DIR, CONFIG_DIR, SCRIPT_OUTPUT_DIR): + directory.mkdir(parents=True, exist_ok=True) + + # Remove stale, differently-versioned creative-mod symlinks so the sandbox + # only ever exposes the current version. + prefix = f"{INFO['name']}_" + for entry in MODS_DIR.iterdir(): + if entry.name.startswith(prefix) and entry.name != VERSIONED_NAME and entry.is_symlink(): + entry.unlink() + + # Always (re)point the live-tree symlink (an idempotent `ln -sfn`). + mod_symlink = MODS_DIR / VERSIONED_NAME + if mod_symlink.is_symlink() or mod_symlink.exists(): + mod_symlink.unlink() + mod_symlink.symlink_to(ROOT) + + # Copy mod-list.json (required) and mod-settings.dat (optional). + src_mod_list = MODS_DEV_DIR / "mod-list.json" + if not src_mod_list.is_file(): + raise FileNotFoundError(f"mod-list.json not found at {src_mod_list}") + (MODS_DIR / "mod-list.json").write_bytes(src_mod_list.read_bytes()) + + src_mod_settings = MODS_DEV_DIR / "mod-settings.dat" + if src_mod_settings.is_file(): + (MODS_DIR / "mod-settings.dat").write_bytes(src_mod_settings.read_bytes()) + + # Write the generated config.ini (write-data -> .debug/). + CONFIG_FILE.write_text(CONFIG_TEMPLATE.format(write_data=DEBUG_DIR)) + + if clean and SAVE_FILE.exists(): + SAVE_FILE.unlink() + + return Sandbox( + debug_dir=DEBUG_DIR, + mods_dir=MODS_DIR, + saves_dir=SAVES_DIR, + config_dir=CONFIG_DIR, + config_file=CONFIG_FILE, + save_file=SAVE_FILE, + game_log=GAME_LOG, + console_log=CONSOLE_LOG, + script_output_dir=SCRIPT_OUTPUT_DIR, + mod_symlink=mod_symlink, + rcon_port=RCON_PORT, + rcon_password=RCON_PASS, + ) + + +def run_create(sandbox: Sandbox, timeout: float) -> str: + """Run the bounded ``--create`` data+control stage and return the log text. + + Runs the ``--create --disable-audio`` step always under a hard timeout + so the call returns. The captured combined stdout/stderr is the same stream + that otherwise lands in factorio-current.log. + + ``--create`` always runs (it overwrites an existing save in place): the + data+control stage *is* the load test, and re-running it is what produces a + fresh ``CREATIVE_MOD_CONTROL_OK`` sentinel and a fresh error scan. It stays + fast because creating an empty map is cheap, and reuses the same save path + (the sandbox/symlink/config are reused — only the map run repeats). + """ + try: + proc = subprocess.run( + [ + str(FACTORIO_BIN), + "--config", + str(sandbox.config_file), + "--mod-directory", + str(sandbox.mods_dir), + "--create", + str(sandbox.save_file), + "--disable-audio", + ], + stdin=subprocess.DEVNULL, + capture_output=True, + text=True, + timeout=timeout, + ) + except subprocess.TimeoutExpired as exc: + captured = (exc.stdout or "") + (exc.stderr or "") + if isinstance(captured, bytes): + captured = captured.decode("utf-8", "replace") + return captured + f"\nError verify.py: --create timed out after {timeout}s\n" + + # The subprocess stdout/stderr is the authoritative single-run stream (the + # same text that lands in the log) — use it directly so a stale + # factorio-current.log from an unrelated prior run can never leak in. + return (proc.stdout or "") + "\n" + (proc.stderr or "") + + +def start_server(sandbox: Sandbox) -> subprocess.Popen: + """Launch the headless server non-interactively and return the Popen handle. + + Uses the standard ``--start-server`` invocation (config + mod dir + RCON + port/password + console log + disabled audio), but as a background process + the caller owns: verify.py polls RCON until the server answers, runs its + assertion batch, then terminates and reaps this handle under a watchdog. + + This never ``exec``s/blocks — the process is detached into a new session so + the whole tree (and any children) can be signalled and reaped cleanly even + if the call is interrupted. + """ + return subprocess.Popen( # noqa: S603 — fixed, self-located argv; no shell + [ + str(FACTORIO_BIN), + "--config", + str(sandbox.config_file), + "--mod-directory", + str(sandbox.mods_dir), + "--start-server", + str(sandbox.save_file), + "--rcon-port", + str(sandbox.rcon_port), + "--rcon-password", + sandbox.rcon_password, + "--console-log", + str(sandbox.console_log), + "--disable-audio", + ], + # Detach from our stdin: the headless server reads its console from + # stdin and would otherwise steal the REPL's piped input (shell mode). + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + start_new_session=True, + ) + + +def start_gui(sandbox: Sandbox) -> subprocess.Popen: + """Launch the full graphical client against the debug save (manual escape hatch). + + Launches the interactive client with ``--load-game`` against the sandbox + save and the same config/mod directory. This is a manual-only + tool — it blocks on the GUI and needs a graphical display — so it is + deliberately NOT bounded or reaped here; the caller waits on it. + """ + return subprocess.Popen( # noqa: S603 — fixed, self-located argv; no shell + [ + str(FACTORIO_BIN), + "--config", + str(sandbox.config_file), + "--mod-directory", + str(sandbox.mods_dir), + "--load-game", + str(sandbox.save_file), + ], + ) diff --git a/verify.py b/verify.py new file mode 100644 index 0000000..ac81534 --- /dev/null +++ b/verify.py @@ -0,0 +1,611 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [] +# /// +""" +verify.py — Local agent-driven verification pipeline for creative-mod. + +A single bounded tool that loads the mod in the maintainer's local Factorio +install, runs assertions, and exits 0/non-zero with a stable, greppable +``RESULT:`` summary so an autonomous agent can edit -> verify -> read result -> +iterate without a human. + +Run it via uv: + + uv run verify.py doctor + uv run verify.py static + uv run verify.py --help + +This is local-only tooling (not CI). Paths are derived from this file's own +location, like the old shell launcher derived them from SCRIPT_DIR; nothing is read +from environment variables. + +Subcommands: + doctor Preflight: Factorio binary + version, uv, jq on PATH. + static Wrap luacheck . and stylua --check . (same invocations as lint.yml). + load (Phase 2) data + control load gate. + behavior (Phase 3) headless server + RCON assertion batch. + all (Phase 3) static -> load -> behavior in sequence. + debug Bounded scriptable debug session (--command one-shot, --gui escape hatch). + shell Bounded RCON pass-through (one-shot arg, or stdin REPL with /c auto-prefix). + +Result contract: + Every subcommand prints exactly one ``RESULT: =PASS`` or + ``RESULT: =FAIL (reason)`` line and exits 0 on success / non-zero on + failure. +""" + +import argparse +import json +import os +import re +import shutil +import signal +import subprocess +import sys +import time +from pathlib import Path + +import rcon +import sandbox + +# --------------------------------------------------------------------------- +# Self-locating paths (mirrors the old shell launcher's SCRIPT_DIR-derived layout) +# --------------------------------------------------------------------------- +ROOT = Path(__file__).resolve().parent +FACTORIO_BIN = (ROOT / ".." / ".." / "bin" / "x64" / "factorio").resolve() +MODS_DEV_DIR = (ROOT / "..").resolve() +INFO = json.loads((ROOT / "info.json").read_text()) +VERSIONED_NAME = f"{INFO['name']}_{INFO['version']}" + + +# --------------------------------------------------------------------------- +# Result / exit-code contract +# --------------------------------------------------------------------------- +def result(name: str, ok: bool, detail: str = "") -> int: + """Print the stable, greppable RESULT line and return the exit code. + + Success prints ``RESULT: =PASS`` and returns 0. + Failure prints ``RESULT: =FAIL (detail)`` and returns 1. + """ + if ok: + print(f"RESULT: {name}=PASS") + return 0 + suffix = f" ({detail})" if detail else "" + print(f"RESULT: {name}=FAIL{suffix}") + return 1 + + +# --------------------------------------------------------------------------- +# doctor — preflight: distinguish "install problem" from "mod problem" +# --------------------------------------------------------------------------- +def cmd_doctor(args: argparse.Namespace) -> int: + problems: list[str] = [] + + # Factorio binary present + executable, and reports a version. + factorio_version: str | None = None + if not FACTORIO_BIN.exists(): + problems.append(f"factorio binary missing at {FACTORIO_BIN}") + elif not FACTORIO_BIN.is_file(): + problems.append(f"factorio binary not a file: {FACTORIO_BIN}") + else: + try: + proc = subprocess.run( + [str(FACTORIO_BIN), "--version"], + capture_output=True, + text=True, + timeout=30, + ) + except PermissionError: + problems.append(f"factorio binary not executable: {FACTORIO_BIN}") + except (OSError, subprocess.TimeoutExpired) as exc: + problems.append(f"factorio --version failed: {exc}") + else: + if proc.returncode != 0: + problems.append(f"factorio --version exited {proc.returncode}") + else: + # First line looks like: "Version: 2.1.7 (build ..., linux64, full)" + first = proc.stdout.strip().splitlines()[0] if proc.stdout.strip() else "" + factorio_version = first.split("Version:", 1)[-1].strip() if "Version:" in first else first + print(f"factorio: {factorio_version or '(version unknown)'} [{FACTORIO_BIN}]") + + # uv and jq must be on PATH. + for tool in ("uv", "jq"): + path = shutil.which(tool) + if path is None: + problems.append(f"{tool} not on PATH") + else: + print(f"{tool}: {path}") + + if problems: + return result("doctor", False, "; ".join(problems)) + return result("doctor", True) + + +# --------------------------------------------------------------------------- +# static — wrap luacheck + stylua --check (same invocations as lint.yml) +# --------------------------------------------------------------------------- +def _run_tool(tool: str, tool_args: list[str]) -> tuple[bool, str]: + """Run a static-analysis tool from the repo root. + + Returns (ok, detail). A missing tool is treated as a failure with a clear + reason so the agent can distinguish "tool not installed" from "lint error". + """ + exe = shutil.which(tool) + if exe is None: + return False, "not found" + proc = subprocess.run( + [exe, *tool_args], + cwd=str(ROOT), + capture_output=True, + text=True, + ) + # Surface the tool's own output so the agent can see what failed. + if proc.stdout: + sys.stdout.write(proc.stdout) + if proc.stderr: + sys.stderr.write(proc.stderr) + return proc.returncode == 0, "" + + +def cmd_static(args: argparse.Namespace) -> int: + # Same invocations as lint.yml (luacheck . / stylua --check .), but exclude + # the local .debug/ sandbox: it is gitignored (absent in CI, where these + # checks pass) and contains the symlinked live tree plus base mods. stylua + # already skips gitignored paths; luacheck does not, so exclude it explicitly + # to keep the local result identical to a clean checkout. + luacheck_ok, luacheck_detail = _run_tool("luacheck", [".", "--exclude-files", ".debug/**"]) + stylua_ok, stylua_detail = _run_tool("stylua", ["--check", "."]) + + if luacheck_ok and stylua_ok: + return result("static", True) + + def label(name: str, ok: bool, detail: str) -> str: + if ok: + return f"{name}=PASS" + return f"{name}=FAIL({detail})" if detail else f"{name}=FAIL" + + detail = f"{label('luacheck', luacheck_ok, luacheck_detail)} {label('stylua', stylua_ok, stylua_detail)}" + return result("static", False, detail) + + +# --------------------------------------------------------------------------- +# Stubs for later phases (registered so --help lists every subcommand) +# --------------------------------------------------------------------------- +def _not_implemented(name: str) -> int: + return result(name, False, "not implemented yet") + + +def cmd_load(args: argparse.Namespace) -> int: + """Cheap data + control load gate. + + Bootstraps the .debug/ sandbox, runs the bounded --create data+control stage, + then evaluates the captured factorio-current.log: + - any ``^Error`` line -> data/control error (real load failure) + - sentinel absent -> control stage incomplete (silent mid-require crash) + Otherwise the mod loaded cleanly and the control stage ran to completion. + """ + sb = sandbox.bootstrap_sandbox(clean=getattr(args, "clean", False)) + log = sandbox.run_create(sb, timeout=args.timeout) + + # Factorio can exit 0 even when a prototype/control error is logged, so scan + # the log text directly. Lines look like " 12.345 Error ...". + if re.search(r"^\s*[\d.:]+\s*Error", log, re.M): + match = re.search(r"^\s*[\d.:]+\s*Error.*$", log, re.M) + detail = "data/control error" + if match: + detail = f"data/control error: {match.group(0).strip()}" + return result("load", False, detail) + + if "CREATIVE_MOD_CONTROL_OK" not in log: + return result("load", False, "control stage incomplete") + + return result("load", True) + + +# --------------------------------------------------------------------------- +# behavior — boot a real headless server, poll RCON, run read-only assertions, +# then terminate + reap under a hard watchdog so the call always returns. +# --------------------------------------------------------------------------- +def _poll_rcon_ready(sb: sandbox.Sandbox, server: subprocess.Popen, deadline: float) -> bool: + """Poll RCON (connect + auth handshake) until the server answers or we time out. + + Decision (outline): use RCON polling for the ready signal — no log scraping. + A trivial command that round-trips proves the server is up, RCON is bound, + and the auth password is accepted. Returns False if the deadline passes or + the server process dies before it ever answers. + """ + while time.monotonic() < deadline: + if server.poll() is not None: + # Server exited before becoming ready — never going to answer. + return False + # rcon.rcon_exec prints to stderr and raises SystemExit on a refused + # connection (its standalone-CLI behavior). During polling that is the + # expected "not up yet" case, so silence stderr for these probe attempts + # to avoid spamming the agent's output on every retry. + with open(os.devnull, "w") as devnull: + saved_stderr = sys.stderr + sys.stderr = devnull + try: + rcon.rcon_exec("localhost", sb.rcon_port, sb.rcon_password, "/c rcon.print(1)") + except (ConnectionRefusedError, ConnectionError, OSError, TimeoutError, SystemExit): + time.sleep(0.25) + continue + finally: + sys.stderr = saved_stderr + return True + return False + + +def _terminate_server(server: subprocess.Popen) -> None: + """Terminate and reap the server's whole process group (SIGTERM -> SIGKILL). + + The server was started in its own session (start_new_session=True), so signal + the process group to take down any children; escalate to SIGKILL if it does + not exit promptly. Always reaps so no orphaned factorio process is left. + """ + if server.poll() is not None: + server.wait() + return + try: + pgid = os.getpgid(server.pid) + except ProcessLookupError: + return + try: + os.killpg(pgid, signal.SIGTERM) + except ProcessLookupError: + return + try: + server.wait(timeout=10) + return + except subprocess.TimeoutExpired: + pass + try: + os.killpg(pgid, signal.SIGKILL) + except ProcessLookupError: + return + try: + server.wait(timeout=10) + except subprocess.TimeoutExpired: + pass + + +def _assert_rcon(sb: sandbox.Sandbox, cmd: str, expected: str, name: str) -> bool: + """Run one read-only RCON assertion and print its per-assertion line. + + Any RCON-layer failure (a Lua error that makes the server drop the response, + a closed connection, a refused socket) is treated as an assertion FAIL with + the error surfaced as the observed value — never an unhandled traceback — so + the command still terminates with a single RESULT line. + """ + try: + out = rcon.rcon_exec("localhost", sb.rcon_port, sb.rcon_password, cmd).strip() + except (ConnectionError, OSError, TimeoutError) as exc: + print(f"assert {name}=FAIL (expected {expected!r} got rcon-error {exc!r})") + return False + except SystemExit: + # rcon.py's standalone helper exits on connection refusal/auth failure. + print(f"assert {name}=FAIL (expected {expected!r} got rcon-connection-failed)") + return False + ok = out == expected + print(f"assert {name}={'PASS' if ok else 'FAIL'} (expected {expected!r} got {out!r})") + return ok + + +def cmd_behavior(args: argparse.Namespace) -> int: + """Boot the headless server, poll RCON, run the read-only assertion batch. + + The batch is fully read-only this phase (decision: no GUI-driven enable on a + headless server with no connected player): + - storage_initialized: storage.creative_mode ~= nil (on_init ran -> this + is also the runtime confirmation of the silent-crash guard) + - default_disabled: storage.creative_mode.enabled == false + + The server is always terminated and reaped under a hard watchdog so the call + returns even if it hangs or never becomes ready. + """ + sb = sandbox.bootstrap_sandbox(clean=getattr(args, "clean", False)) + # The save must exist before --start-server; run the cheap load gate's + # --create if it is missing (or was just cleaned). + if not sb.save_file.exists(): + sandbox.run_create(sb, timeout=args.timeout) + + server = sandbox.start_server(sb) + try: + ready_deadline = time.monotonic() + args.ready_timeout + if not _poll_rcon_ready(sb, server, ready_deadline): + return result("behavior", False, "server not ready") + + # NOTE: a bare RCON "/c" command runs in the *level/scenario* script + # context, where the global ``storage`` is the scenario's storage — NOT + # creative-mod's per-mod storage. Reading ``storage.creative_mode`` + # directly therefore always sees nil even when the mod initialized fine. + # Drive the mod's own remote interface instead so the read executes in + # the mod's context (where ``storage`` is creative-mod's storage). + # + # storage_initialized: remote.call into the mod succeeds (storage.creative_mode + # and its .enabled field are reachable) — this is also the runtime + # confirmation of the silent-crash guard (on_init ran to completion). + # default_disabled: that same call returns false (creative mode off by default). + results = [ + _assert_rcon( + sb, + '/c rcon.print(tostring(pcall(function() ' + 'return remote.call("creative-mode", "is_enabled") end)))', + "true", + "storage_initialized", + ), + _assert_rcon( + sb, + '/c rcon.print(tostring(remote.call("creative-mode", "is_enabled")))', + "false", + "default_disabled", + ), + ] + finally: + _terminate_server(server) + + if all(results): + return result("behavior", True) + failed = [name for name, ok in zip(("storage_initialized", "default_disabled"), results) if not ok] + return result("behavior", False, "assert " + ", ".join(failed)) + + +# --------------------------------------------------------------------------- +# all — run static -> load -> behavior, aggregate into one RESULT line. +# --------------------------------------------------------------------------- +def cmd_all(args: argparse.Namespace) -> int: + """Run the three layers in order and aggregate into a single RESULT line. + + Each layer prints its own RESULT line as it runs (so partial progress is + visible / greppable), then ``all`` emits a combined + ``RESULT: all=... (static=... load=... behavior=...)`` and exits non-zero if + any layer failed. Layers are not short-circuited — a full run reports every + layer's verdict so the agent sees the whole picture in one shot. + """ + static_rc = cmd_static(args) + load_rc = cmd_load(args) + behavior_rc = cmd_behavior(args) + + def label(name: str, rc: int) -> str: + return f"{name}={'PASS' if rc == 0 else 'FAIL'}" + + detail = " ".join( + (label("static", static_rc), label("load", load_rc), label("behavior", behavior_rc)) + ) + ok = static_rc == 0 and load_rc == 0 and behavior_rc == 0 + return result("all", ok, "" if ok else detail) + + +# --------------------------------------------------------------------------- +# Shared helper: ensure a server is up (reuse a running one, else start+reap one) +# --------------------------------------------------------------------------- +def _server_is_up(sb: sandbox.Sandbox) -> bool: + """Return True if an RCON server already answers on the sandbox port. + + Lets shell/debug attach to a server the maintainer already has running + (e.g. a long-lived ``verify.py debug`` session) instead of starting a + second one. Probe failures are the expected "nothing there" case. + """ + with open(os.devnull, "w") as devnull: + saved_stderr = sys.stderr + sys.stderr = devnull + try: + rcon.rcon_exec("localhost", sb.rcon_port, sb.rcon_password, "/c rcon.print(1)") + except (ConnectionRefusedError, ConnectionError, OSError, TimeoutError, SystemExit): + return False + finally: + sys.stderr = saved_stderr + return True + + +def _send_command(sb: sandbox.Sandbox, command: str) -> tuple[bool, str]: + """Send one RCON command, normalizing failures into (ok, text). + + Never raises: a refused/closed connection or auth failure becomes + ``(False, "")`` so the caller can print a single RESULT line. + """ + try: + out = rcon.rcon_exec("localhost", sb.rcon_port, sb.rcon_password, command) + except (ConnectionError, OSError, TimeoutError) as exc: + return False, f"rcon-error {exc!r}" + except SystemExit: + return False, "rcon-connection-failed" + return True, out + + +# --------------------------------------------------------------------------- +# shell — bounded RCON pass-through (one-shot send / stdin REPL) +# --------------------------------------------------------------------------- +def cmd_shell(args: argparse.Namespace) -> int: + """Bounded RCON pass-through. + + One-shot: ``verify.py shell '/c rcon.print(game.tick)'`` sends a single + command and prints the response. With no command argument it reads commands + from stdin, one per line, auto-prefixing ``/c`` for raw Lua — non-blocking, + it stops at EOF. + + Assumes a server is already running; if none answers it starts one + (bounded) for the duration of the call and reaps it on exit. Always + terminates with a single RESULT line. + """ + sb = sandbox.bootstrap_sandbox(clean=getattr(args, "clean", False)) + + started: subprocess.Popen | None = None + try: + if not _server_is_up(sb): + if not sb.save_file.exists(): + sandbox.run_create(sb, timeout=args.timeout) + started = sandbox.start_server(sb) + ready_deadline = time.monotonic() + args.ready_timeout + if not _poll_rcon_ready(sb, started, ready_deadline): + return result("shell", False, "server not ready") + + if args.command is not None: + # One-shot mode. + ok, out = _send_command(sb, args.command) + if not ok: + return result("shell", False, out) + if out.strip(): + print(out.rstrip("\n")) + return result("shell", True) + + # Interactive / piped mode: read lines until EOF, auto-prefix /c. + any_failure = False + for raw in sys.stdin: + line = raw.strip() + if not line or line == "exit": + if line == "exit": + break + continue + command = line if line.startswith("/") else f"/c {line}" + ok, out = _send_command(sb, command) + if not ok: + any_failure = True + print(f"(error) {out}") + continue + if out.strip(): + print(out.rstrip("\n")) + return result("shell", not any_failure, "" if not any_failure else "one or more commands failed") + finally: + if started is not None: + _terminate_server(started) + + +# --------------------------------------------------------------------------- +# debug — bounded scriptable headless session; --gui manual escape hatch +# --------------------------------------------------------------------------- +def cmd_debug(args: argparse.Namespace) -> int: + """Bounded, scriptable headless debug session driven via RCON. + + Default headless flow: bootstrap the sandbox, ensure a save exists, boot the + headless server, poll RCON until ready, optionally run a one-shot + ``--command`` and print its response, then terminate + reap under a hard + watchdog so the call always returns. + + ``--gui`` is the manual-only escape hatch: it launches the full graphical + client with ``--load-game`` against the debug + save. It is explicitly NOT part of the automated loop — it blocks on the + interactive client and needs a graphical display. + """ + sb = sandbox.bootstrap_sandbox(clean=getattr(args, "clean", False)) + + if args.gui: + # Manual escape hatch: full graphical client. This blocks for the + # maintainer's interactive session and is not bounded/automated. + if not sb.save_file.exists(): + sandbox.run_create(sb, timeout=args.timeout) + proc = sandbox.start_gui(sb) + rc = proc.wait() + return result("debug", rc == 0, "" if rc == 0 else f"gui client exited {rc}") + + if not sb.save_file.exists(): + sandbox.run_create(sb, timeout=args.timeout) + + server = sandbox.start_server(sb) + try: + ready_deadline = time.monotonic() + args.ready_timeout + if not _poll_rcon_ready(sb, server, ready_deadline): + return result("debug", False, "server not ready") + + if args.command is not None: + ok, out = _send_command(sb, args.command) + if not ok: + return result("debug", False, out) + if out.strip(): + print(out.rstrip("\n")) + return result("debug", True) + finally: + _terminate_server(server) + + +# --------------------------------------------------------------------------- +# Dispatcher +# --------------------------------------------------------------------------- +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="verify.py", + description="Local agent-driven verification pipeline for creative-mod.", + ) + sub = parser.add_subparsers(dest="command", required=True, metavar="subcommand") + + sub.add_parser("static", help="luacheck . + stylua --check .").set_defaults(func=cmd_static) + + load_parser = sub.add_parser("load", help="data + control load gate") + load_parser.add_argument( + "--clean", + action="store_true", + help="recreate the debug save from scratch (default reuses for a fast loop)", + ) + load_parser.add_argument( + "--timeout", + type=float, + default=180.0, + help="hard timeout (seconds) for the --create stage (default: 180)", + ) + load_parser.set_defaults(func=cmd_load) + + def add_run_args(p: argparse.ArgumentParser) -> None: + p.add_argument( + "--clean", + action="store_true", + help="recreate the debug save from scratch (default reuses for a fast loop)", + ) + p.add_argument( + "--timeout", + type=float, + default=180.0, + help="hard timeout (seconds) for the --create stage (default: 180)", + ) + p.add_argument( + "--ready-timeout", + type=float, + default=120.0, + help="hard timeout (seconds) to wait for the server to answer RCON (default: 120)", + ) + + behavior_parser = sub.add_parser("behavior", help="headless server + RCON assertion batch") + add_run_args(behavior_parser) + behavior_parser.set_defaults(func=cmd_behavior) + + all_parser = sub.add_parser("all", help="static -> load -> behavior in sequence") + add_run_args(all_parser) + all_parser.set_defaults(func=cmd_all) + + debug_parser = sub.add_parser("debug", help="bounded scriptable headless debug session") + add_run_args(debug_parser) + debug_parser.add_argument( + "--command", + default=None, + help="one-shot RCON command to run once the server is ready (e.g. '/c rcon.print(game.tick)')", + ) + debug_parser.add_argument( + "--gui", + action="store_true", + help="manual-only escape hatch: launch the full graphical client against the debug save (blocks; needs a display)", + ) + debug_parser.set_defaults(func=cmd_debug) + + shell_parser = sub.add_parser("shell", help="bounded RCON pass-through (one-shot or stdin REPL)") + add_run_args(shell_parser) + shell_parser.add_argument( + "command", + nargs="?", + default=None, + help="one-shot command to send; omit to read commands from stdin (auto-prefixing /c)", + ) + shell_parser.set_defaults(func=cmd_shell) + + sub.add_parser("doctor", help="preflight: factorio binary/version, uv, jq").set_defaults(func=cmd_doctor) + + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + return args.func(args) + + +if __name__ == "__main__": + raise SystemExit(main())