From 80e449ae63d74af9c8026962d187c7c1fff476ff Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 17:10:56 +0000 Subject: [PATCH 01/27] Add monitor manifest module for queue rehydration Adds cmd_queue.monitor_manifest, which lets a queue's run state be serialized to disk and reloaded by an out-of-process monitor. Each queue subclass now has _build_monitor_manifest, _write_monitor_manifest, and _from_manifest hooks so monitor() and kill() can be invoked on a queue rebuilt from the manifest alone (no jobs resubmitted). This is groundwork for letting the monitor live in its own tmux session that survives the parent shell. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/monitor_manifest.py | 132 ++++++++++++++++++++++++++++++++++ cmd_queue/slurm_queue.py | 49 +++++++++++++ cmd_queue/tmux_queue.py | 65 +++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 cmd_queue/monitor_manifest.py diff --git a/cmd_queue/monitor_manifest.py b/cmd_queue/monitor_manifest.py new file mode 100644 index 0000000..ebc004f --- /dev/null +++ b/cmd_queue/monitor_manifest.py @@ -0,0 +1,132 @@ +from __future__ import annotations +# mypy: ignore-errors + +""" +Persistent metadata describing a queue at run-time so that a monitor process +can reattach to it without holding a live queue object. + +A monitor manifest is a small JSON file written by ``Queue.run()`` (or its +subclass overrides). It captures everything the monitor needs to: + + * read worker state files (tmux backend) or job ids (slurm backend) + * cleanup the queue (kill tmux sessions, scancel slurm jobs) + +The :func:`load_queue_for_monitoring` factory rebuilds a queue object +that is sufficient for ``monitor()`` and ``kill()`` to work, without +re-submitting jobs or re-running the workload. + +An "active queue" index in ``~/.cache/cmd_queue/active/.json`` maps +a human queue name to the most recent manifest path so that +``cmd_queue monitor `` can find it. +""" +import json +from typing import Any, Dict, Optional + +import ubelt as ub + + +SCHEMA_VERSION = 1 + + +def manifest_path_for_dpath(dpath: Any) -> ub.Path: + """Canonical location of the manifest file inside a queue's dpath.""" + return ub.Path(dpath) / 'monitor_manifest.json' + + +def _active_index_dpath() -> ub.Path: + return ub.Path.appdir('cmd_queue/active').ensuredir() + + +def active_index_path(name: str) -> ub.Path: + """Path to the active-queue index entry for the given queue name.""" + return _active_index_dpath() / f'{name}.json' + + +def write_manifest(manifest: Dict[str, Any], path: Any) -> ub.Path: + """Atomically write a manifest dict to ``path``.""" + path = ub.Path(path) + path.parent.ensuredir() + payload = dict(manifest) + payload.setdefault('schema_version', SCHEMA_VERSION) + payload['manifest_path'] = str(path) + tmp = path.with_suffix(path.suffix + '.tmp') + tmp.write_text(json.dumps(payload, indent=2, sort_keys=True)) + tmp.replace(path) + return path + + +def read_manifest(path: Any) -> Dict[str, Any]: + return json.loads(ub.Path(path).read_text()) + + +def update_active_index(name: str, manifest_path: Any) -> Optional[ub.Path]: + """Record ``name -> manifest_path`` so ``cmd_queue monitor `` works. + + Returns the active index entry path on success, ``None`` if no name was + provided (e.g. the queue was unnamed). + """ + if not name: + return None + entry = active_index_path(name) + payload = { + 'name': name, + 'manifest_path': str(manifest_path), + 'updated_at': ub.timestamp(), + } + entry.parent.ensuredir() + tmp = entry.with_suffix(entry.suffix + '.tmp') + tmp.write_text(json.dumps(payload, indent=2, sort_keys=True)) + tmp.replace(entry) + return entry + + +def resolve_manifest(name_or_path: str) -> ub.Path: + """Resolve a name or path argument to an absolute manifest path. + + Accepts: + * an absolute or relative path to a manifest file + * a path to a queue dpath (containing ``monitor_manifest.json``) + * a queue name registered in the active-queue index + """ + candidate = ub.Path(name_or_path).expand() + if candidate.is_file(): + return candidate.absolute() + if candidate.is_dir(): + nested = manifest_path_for_dpath(candidate) + if nested.exists(): + return nested.absolute() + entry = active_index_path(name_or_path) + if entry.exists(): + info = json.loads(entry.read_text()) + path = ub.Path(info['manifest_path']) + if path.exists(): + return path.absolute() + raise FileNotFoundError( + f'Active-index entry for {name_or_path!r} points to ' + f'{path}, which no longer exists.' + ) + raise FileNotFoundError( + f'Could not resolve {name_or_path!r} to a queue manifest. ' + f'Tried as path, dpath, and active-index name.' + ) + + +def load_queue_for_monitoring(manifest_path: Any) -> Any: + """Construct a queue object from a manifest, suitable for monitor/kill. + + The returned queue has no submitted jobs. Its ``monitor()`` and + ``kill()`` methods operate on the persisted state files / job ids that + the original ``run()`` invocation produced. + """ + manifest = read_manifest(manifest_path) + backend = manifest['backend'] + if backend == 'tmux': + from cmd_queue import tmux_queue + return tmux_queue.TMUXMultiQueue._from_manifest(manifest) + elif backend == 'slurm': + from cmd_queue import slurm_queue + return slurm_queue.SlurmQueue._from_manifest(manifest) + else: + raise NotImplementedError( + f'Monitor reattach is not implemented for backend {backend!r}' + ) diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index 61a40d7..6c63f01 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -910,6 +910,55 @@ def read_state(self) -> Dict[str, Any]: # this return {} + def _build_monitor_manifest(self) -> Dict[str, Any]: + """Snapshot enough state for an out-of-process monitor to reattach.""" + return { + 'backend': 'slurm', + 'name': self.name if hasattr(self, 'name') else self.queue_id, + 'queue_id': self.queue_id, + 'dpath': str(self.dpath), + 'fpath': str(self.fpath), + 'jobid_fpath': str(self.jobid_fpath) if self.jobid_fpath else None, + 'job_names': [job.name for job in self.jobs], + } + + def _write_monitor_manifest(self) -> Any: + """Persist the monitor manifest to ``/monitor_manifest.json``.""" + from cmd_queue import monitor_manifest as mm + path = mm.manifest_path_for_dpath(self.dpath) + manifest = self._build_monitor_manifest() + mm.write_manifest(manifest, path) + # Use queue_id as the active-index name; SlurmQueue does not require a + # human name, so this lets `cmd_queue monitor ` work too. + mm.update_active_index(manifest['name'], path) + return path + + @classmethod + def _from_manifest(cls, manifest: Dict[str, Any]) -> "SlurmQueue": + """Reconstruct a queue suitable for ``monitor()`` / ``kill()`` only.""" + self = cls.__new__(cls) + base_queue.Queue.__init__(self) + self.queue_id = manifest['queue_id'] + self.name = manifest.get('name', self.queue_id) + self.dpath = ub.Path(manifest['dpath']) + self.fpath = ub.Path(manifest['fpath']) + self.log_dpath = self.dpath / 'logs' + self.shell = None + self.preamble = [] + self.all_depends = None + self._sbatch_kvargs = ub.udict() + self._sbatch_flags = ub.udict() + self._include_monitor_metadata = False + jobid_fpath = manifest.get('jobid_fpath') + self.jobid_fpath = ub.Path(jobid_fpath) if jobid_fpath else None + self.unused_kwargs = {} + # The reconstructed jobs only need a name for kill() (scancel --name). + self.jobs = [ + SlurmJob(command='', name=name) + for name in manifest.get('job_names', []) + ] + return self + def print_commands(self, *args: Any, **kwargs: Any) -> None: r""" Print info about the commands, optionally with rich diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index dc83d90..d70b1a1 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -989,6 +989,71 @@ def _tmux_current_sessions(self): sessions = tmux.list_sessions() return sessions + def _build_monitor_manifest(self) -> Dict[str, Any]: + """Snapshot enough state for an out-of-process monitor to reattach.""" + workers_info = [] + for worker in self.workers: + workers_info.append({ + 'name': worker.name, + 'rootid': worker.rootid, + 'dpath': str(worker.dpath), + 'pathid': worker.pathid, + 'state_fpath': str(worker.state_fpath), + 'fpath': str(worker.fpath), + 'environ': dict(worker.environ or {}), + }) + return { + 'backend': 'tmux', + 'name': self.name, + 'rootid': self.rootid, + 'pathid': self.pathid, + 'dpath': str(self.dpath), + 'fpath': str(self.fpath), + 'size': self.size, + 'gpus': self.gpus, + 'tmux_session_prefix': self._tmux_session_prefix, + 'workers': workers_info, + } + + def _write_monitor_manifest(self) -> Any: + """Persist the monitor manifest to ``/monitor_manifest.json``.""" + from cmd_queue import monitor_manifest as mm + path = mm.manifest_path_for_dpath(self.dpath) + manifest = self._build_monitor_manifest() + mm.write_manifest(manifest, path) + mm.update_active_index(self.name, path) + return path + + @classmethod + def _from_manifest(cls, manifest: Dict[str, Any]) -> "TMUXMultiQueue": + """Reconstruct a queue suitable for ``monitor()`` / ``kill()`` only.""" + self = cls.__new__(cls) + # Initialize the base Queue state without re-creating workers / dpaths. + base_queue.Queue.__init__(self) + self.name = manifest['name'] + self.rootid = manifest['rootid'] + self.pathid = manifest.get('pathid', '{}_{}'.format(self.name, self.rootid)) + self.dpath = ub.Path(manifest['dpath']) + self.fpath = ub.Path(manifest['fpath']) + self.size = manifest['size'] + self.gpus = manifest.get('gpus') + self.environ = {} + self.cmd_verbose = 2 + self._tmux_session_prefix = manifest.get('tmux_session_prefix', 'cmdq_') + self.job_info_dpath = self.dpath / 'job_info' + self.preamble = [] + self.jobs = [] + self.workers = [ + serial_queue.SerialQueue( + name=w['name'], + rootid=w['rootid'], + dpath=ub.Path(w['dpath']), + environ=w.get('environ') or {}, + ) + for w in manifest.get('workers', []) + ] + return self + def has_stdin() -> bool: import sys From bfd6100b805a77632c15c0d984bea4dba66d9d08 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 17:12:14 +0000 Subject: [PATCH 02/27] Write monitor manifest from run() for tmux and slurm queues Each backend now persists its monitor manifest at the start of run(), which makes the queue reattachable from a separate process. Also preserves the user-supplied SlurmQueue name on self.name (previously dropped after queue_id was constructed) so that name-based monitor lookup works for both queue_id and the friendly name. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/slurm_queue.py | 12 ++++++++---- cmd_queue/tmux_queue.py | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index 6c63f01..d659ffc 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -463,6 +463,7 @@ def __init__( self.jobs = [] if name is None: name = 'SQ' + self.name = name stamp = time.strftime('%Y%m%dT%H%M%S') self.unused_kwargs = kwargs self.queue_id = name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8] @@ -697,6 +698,7 @@ def run(self, block: bool = True, system: bool = False, **kw: Any) -> Optional[A raise Exception('slurm backend is not available') self.log_dpath.ensuredir() self.write() + self._write_monitor_manifest() ub.cmd(f'bash {self.fpath}', verbose=3, check=True, system=system) if block: return self.monitor() @@ -914,7 +916,7 @@ def _build_monitor_manifest(self) -> Dict[str, Any]: """Snapshot enough state for an out-of-process monitor to reattach.""" return { 'backend': 'slurm', - 'name': self.name if hasattr(self, 'name') else self.queue_id, + 'name': self.name or self.queue_id, 'queue_id': self.queue_id, 'dpath': str(self.dpath), 'fpath': str(self.fpath), @@ -928,9 +930,11 @@ def _write_monitor_manifest(self) -> Any: path = mm.manifest_path_for_dpath(self.dpath) manifest = self._build_monitor_manifest() mm.write_manifest(manifest, path) - # Use queue_id as the active-index name; SlurmQueue does not require a - # human name, so this lets `cmd_queue monitor ` work too. - mm.update_active_index(manifest['name'], path) + # Register under both queue_id (always unique) and the user-supplied + # name (when distinct) so `cmd_queue monitor ` finds it. + mm.update_active_index(self.queue_id, path) + if self.name and self.name != self.queue_id: + mm.update_active_index(self.name, path) return path @classmethod diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index d70b1a1..9560bdc 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -688,6 +688,7 @@ def run( self.kill_other_queues(ask_first=True) self.write() + self._write_monitor_manifest() ub.cmd(f'bash {self.fpath}', verbose=self.cmd_verbose, check=True, system=system) if block: From 858c77696753332e983c62e73f02e6d326912e17 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 17:13:37 +0000 Subject: [PATCH 03/27] Add `cmd_queue monitor ` CLI subcommand Reattaches to a running queue by name (via the active-index that run() populates), by manifest path (--manifest), or by dpath. This is the entry point that step 3's tmux monitor backend will execute inside its own tmux session, and is also useful on its own when the original run() shell has been closed but workers are still active. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/main.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/cmd_queue/main.py b/cmd_queue/main.py index 555eaf4..960bcbc 100644 --- a/cmd_queue/main.py +++ b/cmd_queue/main.py @@ -286,6 +286,75 @@ def run(config) -> None: queue = config._build_queue() queue.run() + class monitor(CommonConfig): + """ + Monitor an already-running queue. + + Locates the queue by name (via the active-queue index that ``run`` + populates), by manifest path, or by the queue's working directory. + Useful for reattaching to a queue whose ``run()`` invocation has + ended (e.g. shell closed) while workers are still active, and as + the entry point used by the tmux monitor backend to host the + status UI in its own session. + """ + __command__ = 'monitor' + + manifest = scfg.Value(None, help=ub.paragraph( + ''' + Optional explicit path to the monitor manifest JSON. If + given, this overrides positional name resolution. + ''')) + + onfail = scfg.Value('', choices=['', 'kill'], help=ub.paragraph( + ''' + What to do if the queue ends with at least one failure. + ``kill`` cancels still-running workers; ``''`` leaves them. + ''')) + + onexit = scfg.Value('', choices=['', 'capture'], help=ub.paragraph( + ''' + What to do once the queue is fully done. ``capture`` runs the + backend's capture step (e.g. dump tmux pane contents). + ''')) + + refresh_rate = scfg.Value(0.4, help='monitor refresh rate, seconds') + + with_textual = scfg.Value('auto', help='use textual UI if available (tmux backend only)') + + def run(config) -> None: + from cmd_queue import monitor_manifest as mm + if config.manifest: + manifest_path = ub.Path(config.manifest).expand().absolute() + if not manifest_path.exists(): + raise FileNotFoundError(manifest_path) + else: + target = config['qname'] + if not target: + raise SystemExit( + 'cmd_queue monitor requires either a queue name ' + '(positional) or --manifest=' + ) + manifest_path = mm.resolve_manifest(target) + if config.verbose: + rich.print(f'Loading monitor manifest from [bold]{manifest_path}[/bold]') + queue = mm.load_queue_for_monitoring(manifest_path) + kwargs = {} + try: + kwargs['refresh_rate'] = config.refresh_rate + except Exception: + pass + if 'with_textual' in queue.monitor.__code__.co_varnames: + kwargs['with_textual'] = config.with_textual + agg_state = queue.monitor(**kwargs) + agg_state = agg_state or {} + if config.onexit == 'capture' and hasattr(queue, 'capture'): + queue.capture() + # The existing TMUXMultiQueue.run semantics: if everything passed + # and onfail='kill', clean up the now-idle tmux sessions. If + # anything failed, leave them alive so the user can investigate. + if config.onfail == 'kill' and not agg_state.get('failed'): + queue.kill() + class show(CommonShowRun): """ display a queue From 02de1171f7230938b1a01228773a9b98d97bb469 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 17:15:35 +0000 Subject: [PATCH 04/27] Move post-run cleanup ownership from run() into monitor() Both TMUXMultiQueue.monitor and SlurmQueue.monitor now accept onfail/onexit kwargs and perform the corresponding kill()/capture() themselves. run() simply forwards the args. This way the same finalization happens whether the monitor runs inline, in a separate tmux session (step 3), or via `cmd_queue monitor` from another shell. The semantics are preserved (onfail='kill' tears down idle tmux sessions only on a clean exit; on slurm it fires only on failure). Co-Authored-By: Claude Opus 4.7 --- cmd_queue/main.py | 17 +++++++------- cmd_queue/slurm_queue.py | 50 ++++++++++++++++++++++++++++++++++++---- cmd_queue/tmux_queue.py | 39 +++++++++++++++++++++++-------- 3 files changed, 84 insertions(+), 22 deletions(-) diff --git a/cmd_queue/main.py b/cmd_queue/main.py index 960bcbc..02ba7d8 100644 --- a/cmd_queue/main.py +++ b/cmd_queue/main.py @@ -345,15 +345,14 @@ def run(config) -> None: pass if 'with_textual' in queue.monitor.__code__.co_varnames: kwargs['with_textual'] = config.with_textual - agg_state = queue.monitor(**kwargs) - agg_state = agg_state or {} - if config.onexit == 'capture' and hasattr(queue, 'capture'): - queue.capture() - # The existing TMUXMultiQueue.run semantics: if everything passed - # and onfail='kill', clean up the now-idle tmux sessions. If - # anything failed, leave them alive so the user can investigate. - if config.onfail == 'kill' and not agg_state.get('failed'): - queue.kill() + # monitor() owns post-run cleanup; only forward the kwargs the + # backend's monitor signature actually accepts. + varnames = queue.monitor.__code__.co_varnames + if 'onfail' in varnames: + kwargs['onfail'] = config.onfail + if 'onexit' in varnames: + kwargs['onexit'] = config.onexit + queue.monitor(**kwargs) class show(CommonShowRun): """ diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index d659ffc..1d9f209 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -693,7 +693,14 @@ def finalize_text(self, exclude_tags: Optional[Any] = None, **kwargs: Any) -> st text = '\n'.join(commands) return text - def run(self, block: bool = True, system: bool = False, **kw: Any) -> Optional[Any]: + def run( + self, + block: bool = True, + system: bool = False, + onfail: str = '', + onexit: str = '', + **kw: Any, + ) -> Optional[Any]: if not self.is_available(): raise Exception('slurm backend is not available') self.log_dpath.ensuredir() @@ -701,11 +708,28 @@ def run(self, block: bool = True, system: bool = False, **kw: Any) -> Optional[A self._write_monitor_manifest() ub.cmd(f'bash {self.fpath}', verbose=3, check=True, system=system) if block: - return self.monitor() + return self.monitor(onfail=onfail, onexit=onexit) - def monitor(self, refresh_rate: float = 0.4) -> Optional[Any]: + def monitor( + self, + refresh_rate: float = 0.4, + onfail: str = '', + onexit: str = '', + ) -> Optional[Any]: """ - Monitor progress until the jobs are done + Monitor progress until the jobs are done. + + Owns post-run cleanup so that whether the monitor runs inline or + in a separate process (tmux monitor backend, ``cmd_queue + monitor`` CLI), the same finalization happens. + + Args: + onfail (str): if ``'kill'``, scancel the queue's jobs after + the monitor exits when there are failures. Slurm has no + tmux-style sessions to clean up on success, so this only + fires on failure. + onexit (str): currently unused for slurm (kept for API + parity with the tmux backend). CommandLine: xdoctest -m cmd_queue.slurm_queue SlurmQueue.monitor --dev --run @@ -886,6 +910,16 @@ def update_status_table(): return table, finished + agg_state: Dict[str, Any] = {} + + def _update_agg_state() -> None: + if job_status_table is None: + return + counts = ub.dict_hist([row['status'] for row in job_status_table]) + for key in ('passed', 'failed', 'skipped'): + agg_state[key] = counts.get(key, 0) + agg_state['total'] = len(job_status_table) + try: table, finished = update_status_table() refresh_rate = 0.4 @@ -894,11 +928,19 @@ def update_status_table(): time.sleep(refresh_rate) table, finished = update_status_table() live.update(table) + _update_agg_state() except KeyboardInterrupt: from rich.prompt import Confirm flag = Confirm.ask('do you to kill the procs?') if flag: self.kill() + return agg_state + + # Slurm has no idle sessions to clean up on success, so onfail='kill' + # only fires when there are observed failures. + if onfail == 'kill' and agg_state.get('failed'): + self.kill() + return agg_state def kill(self) -> None: cancel_commands = [] diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 9560bdc..5a96adb 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -692,13 +692,11 @@ def run( ub.cmd(f'bash {self.fpath}', verbose=self.cmd_verbose, check=True, system=system) if block: - agg_state = self.monitor(with_textual=with_textual) - if onexit == 'capture': - self.capture() - if not agg_state['failed']: - if onfail == 'kill': - self.kill() - return agg_state + return self.monitor( + with_textual=with_textual, + onfail=onfail, + onexit=onexit, + ) def read_state(self) -> Any: agg_state = {} @@ -735,9 +733,28 @@ def serial_run(self) -> None: for fpath in queue_fpaths: ub.cmd(f'{fpath}', verbose=self.cmd_verbose, check=True) - def monitor(self, refresh_rate: float = 0.4, with_textual: str = 'auto') -> None: + def monitor( + self, + refresh_rate: float = 0.4, + with_textual: str = 'auto', + onfail: str = '', + onexit: str = '', + ) -> None: """ - Monitor progress until the jobs are done + Monitor progress until the jobs are done. + + Owns post-run cleanup so that whether the monitor runs inline or + in a separate process (tmux monitor backend, ``cmd_queue + monitor`` CLI), the same finalization happens. + + Args: + onfail (str): if ``'kill'`` and the queue ends with no + failures, kill the now-idle tmux sessions. (The arg is + named for historical reasons; the original behavior was + "tear down on a clean exit, leave alive on failure so + the user can investigate.") + onexit (str): if ``'capture'``, dump tmux pane contents + after the queue finishes. CommandLine: xdoctest -m cmd_queue.tmux_queue TMUXMultiQueue.monitor:0 @@ -794,6 +811,10 @@ def monitor(self, refresh_rate: float = 0.4, with_textual: str = 'auto') -> None else: self._simple_rich_monitor(refresh_rate) table, finished, agg_state = self._build_status_table() + if onexit == 'capture': + self.capture() + if onfail == 'kill' and not agg_state.get('failed'): + self.kill() return agg_state def _textual_monitor(self): From d2f5352d44f055acdccfdd976468371f6bc55aa1 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 17:19:30 +0000 Subject: [PATCH 05/27] Add monitor='inline'|'tmux'|'none' kwarg to run() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new ``monitor`` kwarg on TMUXMultiQueue.run and SlurmQueue.run controls where the live status UI runs while the queue is executing. Default is ``'inline'`` (current behavior). With ``'tmux'``, the monitor is spawned in a detached tmux session via the new ``util_tmux.tmux.spawn_monitor_session`` helper, which invokes ``cmd_queue monitor --manifest=`` under sys.executable. The parent process still blocks on a headless state poll, so block=True keeps its meaning even when the visible UI lives elsewhere — closing or detaching the tmux UI does not return control early. The tmux monitor session intentionally outlives the workers: workers self-clean on success, so the monitor session is what holds the final status table open for the user to read. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/slurm_queue.py | 74 +++++++++++++++++++++++++++++- cmd_queue/tmux_queue.py | 90 ++++++++++++++++++++++++++++++++++++- cmd_queue/util/util_tmux.py | 76 ++++++++++++++++++++++++++++++- 3 files changed, 235 insertions(+), 5 deletions(-) diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index 1d9f209..3aae1ad 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -699,16 +699,86 @@ def run( system: bool = False, onfail: str = '', onexit: str = '', + monitor: str = 'inline', **kw: Any, ) -> Optional[Any]: + """ + Execute the queue. + + Args: + monitor (str): where the live status UI runs while + ``block=True``. ``'inline'`` (default) renders in the + current shell. ``'tmux'`` spawns ``cmd_queue monitor`` + in a detached tmux session so the UI survives the + calling shell closing — useful for slurm jobs whose + workers run on the cluster long after the submit shell + might be gone. ``'none'`` skips the UI but still blocks + when ``block=True``. + """ if not self.is_available(): raise Exception('slurm backend is not available') self.log_dpath.ensuredir() self.write() - self._write_monitor_manifest() + manifest_path = self._write_monitor_manifest() ub.cmd(f'bash {self.fpath}', verbose=3, check=True, system=system) - if block: + if not block: + return None + if monitor == 'inline': return self.monitor(onfail=onfail, onexit=onexit) + if monitor == 'none': + from rich import print as rich_print + rich_print( + '[bold]Queue running detached.[/bold] ' + f'Reattach with: cmd_queue monitor --manifest={manifest_path}' + ) + return None + if monitor == 'tmux': + if not ub.find_exe('tmux'): + import warnings + warnings.warn( + "monitor='tmux' requested but tmux not found; " + "falling back to inline monitor.") + return self.monitor(onfail=onfail, onexit=onexit) + from cmd_queue.tmux_queue import has_stdin + from cmd_queue.util.util_tmux import tmux as _tmux + extra_args = [] + if onfail: + extra_args.append(f'--onfail={onfail}') + if onexit: + extra_args.append(f'--onexit={onexit}') + session_name = f'cmdq-monitor-{self.queue_id}' + from rich import print as rich_print + rich_print( + f'[bold]Launching monitor in tmux session[/bold] {session_name}' + ) + _tmux.spawn_monitor_session( + session_name=session_name, + manifest_path=manifest_path, + attach=has_stdin(), + verbose=0, + extra_args=extra_args, + ) + return self._headless_block_until_done() + raise ValueError( + f"monitor must be one of 'inline', 'tmux', 'none'; got {monitor!r}" + ) + + def _headless_block_until_done(self, refresh_rate: float = 5.0) -> None: + """Poll squeue until none of this queue's job names are still queued.""" + import time + job_names = {job.name for job in self.jobs} + if not job_names: + return None + while True: + info = ub.cmd('squeue --format="%j"') + still_queued = { + line.strip() + for line in info['out'].splitlines() + if line.strip() in job_names + } + if not still_queued: + return None + time.sleep(refresh_rate) def monitor( self, diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 5a96adb..2395288 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -658,6 +658,7 @@ def run( with_textual: str = 'auto', check_other_sessions: Optional[bool] = None, other_session_handler: str = 'auto', + monitor: str = 'inline', **kw: Any, ) -> None: """ @@ -669,6 +670,19 @@ def run( with the same queue name. Can be 'kill', 'ask', or 'ignore', or 'auto' - which defaults to 'ask' if stdin is available and 'kill' if it is not. + + monitor (str): + Where the live status UI runs while ``block=True``. + + * ``'inline'`` (default): renders in the current shell, just + like today. Closing the shell loses the view. + * ``'tmux'``: spawns ``cmd_queue monitor --manifest=...`` + in a detached tmux session and (when interactive) attaches + the user to it. The current process still blocks until + jobs finish (and runs the post-run cleanup), so detaching + the tmux UI does not return control to the caller. + * ``'none'``: no UI; the call still blocks via a headless + state-file poll when ``block=True``. """ if not self.is_available(): @@ -688,15 +702,87 @@ def run( self.kill_other_queues(ask_first=True) self.write() - self._write_monitor_manifest() + manifest_path = self._write_monitor_manifest() ub.cmd(f'bash {self.fpath}', verbose=self.cmd_verbose, check=True, system=system) - if block: + if not block: + return None + return self._dispatch_monitor( + monitor=monitor, + manifest_path=manifest_path, + onfail=onfail, + onexit=onexit, + with_textual=with_textual, + ) + + def _dispatch_monitor( + self, + monitor: str, + manifest_path: Any, + onfail: str, + onexit: str, + with_textual: str = 'auto', + ) -> Any: + if monitor == 'inline': return self.monitor( with_textual=with_textual, onfail=onfail, onexit=onexit, ) + if monitor == 'none': + from rich import print as rich_print + rich_print( + '[bold]Queue running detached.[/bold] ' + f'Reattach with: cmd_queue monitor --manifest={manifest_path}' + ) + return self._headless_block_until_done() + if monitor == 'tmux': + if not ub.find_exe('tmux'): + import warnings + warnings.warn( + "monitor='tmux' requested but tmux not found; " + "falling back to inline monitor.") + return self.monitor( + with_textual=with_textual, + onfail=onfail, + onexit=onexit, + ) + extra_args = [] + if onfail: + extra_args.append(f'--onfail={onfail}') + if onexit: + extra_args.append(f'--onexit={onexit}') + session_name = f'cmdq-monitor-{self.pathid}' + from rich import print as rich_print + rich_print( + f'[bold]Launching monitor in tmux session[/bold] {session_name}' + ) + tmux.spawn_monitor_session( + session_name=session_name, + manifest_path=manifest_path, + attach=has_stdin(), + verbose=0, + extra_args=extra_args, + ) + # The tmux session now owns the UI and the post-run cleanup. + # The caller still blocks here so block=True keeps its meaning. + return self._headless_block_until_done() + raise ValueError( + f"monitor must be one of 'inline', 'tmux', 'none'; got {monitor!r}" + ) + + def _headless_block_until_done(self, refresh_rate: float = 1.0) -> Any: + """Poll the per-worker state files until all workers are finished. + + Used as the parent-side block-wait when the visible monitor is + running elsewhere (in a tmux session, or not at all). + """ + import time + while True: + table, finished, agg_state = self._build_status_table() + if finished: + return agg_state + time.sleep(refresh_rate) def read_state(self) -> Any: agg_state = {} diff --git a/cmd_queue/util/util_tmux.py b/cmd_queue/util/util_tmux.py index 55a50fb..77c4afa 100644 --- a/cmd_queue/util/util_tmux.py +++ b/cmd_queue/util/util_tmux.py @@ -3,7 +3,7 @@ """ Generic tmux helpers """ -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional import ubelt as ub @@ -55,6 +55,80 @@ def kill_session(target_session: str, verbose: int = 3) -> Any: def kill_pane(pane_id: str, verbose: int = 3) -> Any: return ub.cmd(f'tmux kill-pane -t {pane_id}', verbose=verbose) + @staticmethod + def is_inside() -> bool: + """True if the current process is running inside a tmux session.""" + import os + return bool(os.environ.get('TMUX')) + + @staticmethod + def has_session(target_session: str) -> bool: + info = ub.cmd(['tmux', 'has-session', '-t', target_session]) + return info['ret'] == 0 + + @staticmethod + def spawn_monitor_session( + session_name: str, + manifest_path: Any, + attach: bool = True, + verbose: int = 0, + extra_args: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Start ``cmd_queue monitor --manifest=`` in a detached tmux + session and (optionally) attach the user to it. + + Returns a dict describing what was created and how to reattach. + """ + import os + import shlex + import sys + if not ub.find_exe('tmux'): + raise RuntimeError('tmux is not available') + + # Always invoke the same Python interpreter that started run() — a + # globally-installed older ``cmd_queue`` binary on PATH would not + # know about the monitor subcommand. + cmd_parts = [ + sys.executable, '-m', 'cmd_queue', 'monitor', + '--manifest=' + str(manifest_path), + ] + if extra_args: + cmd_parts.extend(extra_args) + # Wrap in a small shell script so the pane stays open after the + # monitor exits, letting the user see the final table. + inner = ' '.join(shlex.quote(p) for p in cmd_parts) + bash_payload = ( + f'{inner}; ' + 'echo; echo "[cmd_queue monitor exited] press enter to close"; ' + 'read -r _' + ) + new_session_cmd = [ + 'tmux', 'new-session', '-d', '-s', session_name, + 'bash', '-lc', bash_payload, + ] + ub.cmd(new_session_cmd, verbose=verbose, check=True) + + info: Dict[str, Any] = { + 'session_name': session_name, + 'attach_command': f'tmux attach -t {session_name}', + } + if attach: + inside = bool(os.environ.get('TMUX')) + if inside: + # Switching the current client is the in-tmux equivalent of + # attach; spawning a nested attach is rejected by tmux. + ub.cmd(['tmux', 'switch-client', '-t', session_name], + verbose=verbose, check=True) + info['attached_via'] = 'switch-client' + else: + # ``attach-session`` is interactive, so let the foreground + # process inherit the tty. + ub.cmd(['tmux', 'attach-session', '-t', session_name], + verbose=verbose, check=False) + info['attached_via'] = 'attach-session' + return info + @staticmethod def list_panes(target_session: str) -> List[Dict[str, str]]: """ From 6022168718fa6c330a90ab16977d2dd9bb8c4703 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 18:21:14 +0000 Subject: [PATCH 06/27] Add examples/tmux_example.py demonstrating monitor= modes Single-file example covering monitor='inline', 'tmux', and 'none'. Useful as both a hands-on demo for users and a smoke test that the new monitor backend works against a small real DAG. Co-Authored-By: Claude Opus 4.7 --- examples/tmux_example.py | 102 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 examples/tmux_example.py diff --git a/examples/tmux_example.py b/examples/tmux_example.py new file mode 100644 index 0000000..d05f99d --- /dev/null +++ b/examples/tmux_example.py @@ -0,0 +1,102 @@ +""" +Demonstrates the ``monitor`` kwarg on the tmux backend. + +Three modes are illustrated: + + * ``monitor='inline'`` (default) — the live status table renders in + the current shell, just like before. Closing the shell loses the + view and (depending on your terminal) may kill the parent process. + + * ``monitor='tmux'`` — the status table renders in a *separate* + detached tmux session. The original shell still blocks until jobs + finish, but the visible UI (and the post-run cleanup) lives in a + session that survives the shell closing. Run with ``--mode=tmux``. + + * ``monitor='none'`` — no live UI; ``run()`` headless-blocks until + jobs finish. Useful in non-interactive scripts. The reattach hint + is still printed so a human can attach via ``cmd_queue monitor``. + +CommandLine: + # Default: inline monitor (current shell) + python ~/code/cmd_queue/examples/tmux_example.py + + # Spawn the monitor in its own tmux session and attach + python ~/code/cmd_queue/examples/tmux_example.py --mode=tmux + + # Run silently and reattach manually with `cmd_queue monitor ` + python ~/code/cmd_queue/examples/tmux_example.py --mode=none +""" +import argparse + + +def main(): + import cmd_queue + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--mode', + choices=['inline', 'tmux', 'none'], + default='inline', + help='Where the monitor UI runs.', + ) + parser.add_argument( + '--name', + default='tmux-example', + help='Queue name; also doubles as the lookup key for ' + '`cmd_queue monitor `.', + ) + parser.add_argument( + '--workers', type=int, default=2, + help='Number of parallel tmux workers.', + ) + args = parser.parse_args() + + queue = cmd_queue.Queue.create( + backend='tmux', + size=args.workers, + name=args.name, + ) + + # Build a small DAG so the status table has something interesting to show. + job_a = queue.submit('echo "a starting"; sleep 2; echo "a done"', name='a') + job_b = queue.submit('echo "b starting"; sleep 3; echo "b done"', name='b') + queue.submit( + 'echo "c (depends on a, b)"; sleep 1; echo "c done"', + name='c', + depends=[job_a, job_b], + ) + + queue.print_graph() + + if not queue.is_available(): + raise SystemExit('tmux backend not available on this machine') + + print(f'\nLaunching with monitor={args.mode!r}\n') + + # The interesting line. Identical for any monitor mode — only the + # location of the UI changes. + result = queue.run( + block=True, + monitor=args.mode, + onfail='kill', # tear down idle worker sessions on success + other_session_handler='kill', + ) + + print(f'\nrun() returned: {result}') + if args.mode == 'tmux': + print( + 'The monitor tmux session stayed alive after the workers ' + 'finished so the final status table is visible. Reattach ' + 'from any shell with:\n' + f' tmux attach -t cmdq-monitor-{args.name}-...\n' + 'or look it up by queue name with:\n' + f' cmd_queue monitor {args.name}' + ) + + +if __name__ == '__main__': + """ + CommandLine: + python ~/code/cmd_queue/examples/tmux_example.py --mode=tmux + """ + main() From db9eb5f23241601287419adc175e3571385268ae Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 18:34:15 +0000 Subject: [PATCH 07/27] Make tmux monitor opt-in with press-key attach prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parent shell no longer pulls the user's tty into the spawned monitor session. Instead, after spawning the monitor it prints a prompt explaining how to attach (or switch-client when already inside tmux) and a manual reattach hint, then enters a cbreak keypress loop: [a] attach (or switch-client) to the monitor session — user can detach with the usual binding and we re-enter the loop. [q]/[d] stop watching from this shell (queue keeps running). Non-TTY stdin falls back to a silent polling loop, so the path remains usable in scripts and CI. Also drops the synthetic "press enter to close" prompt at the end of the monitor pane in favour of `exec bash`, so the pane stays open without needing user input but doesn't trap the user behind a read. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/slurm_queue.py | 40 ++++++++-------- cmd_queue/tmux_queue.py | 18 ++++++-- cmd_queue/util/util_tmux.py | 91 ++++++++++++++++++++++++++++++++++--- examples/tmux_example.py | 2 +- 4 files changed, 120 insertions(+), 31 deletions(-) diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index 3aae1ad..6dd4094 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -754,32 +754,34 @@ def run( _tmux.spawn_monitor_session( session_name=session_name, manifest_path=manifest_path, - attach=has_stdin(), + attach=False, verbose=0, extra_args=extra_args, ) - return self._headless_block_until_done() + job_names = {job.name for job in self.jobs} + + def _is_finished() -> bool: + if not job_names: + return True + info = ub.cmd('squeue --format="%j"') + still_queued = { + line.strip() + for line in info['out'].splitlines() + if line.strip() in job_names + } + return not still_queued + + _tmux.block_with_attach_prompt( + session_name=session_name, + is_finished_fn=_is_finished, + refresh_rate=5.0, + label=f'queue {self.name or self.queue_id}', + ) + return None raise ValueError( f"monitor must be one of 'inline', 'tmux', 'none'; got {monitor!r}" ) - def _headless_block_until_done(self, refresh_rate: float = 5.0) -> None: - """Poll squeue until none of this queue's job names are still queued.""" - import time - job_names = {job.name for job in self.jobs} - if not job_names: - return None - while True: - info = ub.cmd('squeue --format="%j"') - still_queued = { - line.strip() - for line in info['out'].splitlines() - if line.strip() in job_names - } - if not still_queued: - return None - time.sleep(refresh_rate) - def monitor( self, refresh_rate: float = 0.4, diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 2395288..db22ed4 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -760,13 +760,23 @@ def _dispatch_monitor( tmux.spawn_monitor_session( session_name=session_name, manifest_path=manifest_path, - attach=has_stdin(), + attach=False, verbose=0, extra_args=extra_args, ) - # The tmux session now owns the UI and the post-run cleanup. - # The caller still blocks here so block=True keeps its meaning. - return self._headless_block_until_done() + # Don't pull the user's terminal into the monitor session; let + # them attach on demand and freely detach back to this shell. + def _is_finished() -> bool: + _, finished, _ = self._build_status_table() + return finished + tmux.block_with_attach_prompt( + session_name=session_name, + is_finished_fn=_is_finished, + refresh_rate=1.0, + label=f'queue {self.name}', + ) + _, _, agg_state = self._build_status_table() + return agg_state raise ValueError( f"monitor must be one of 'inline', 'tmux', 'none'; got {monitor!r}" ) diff --git a/cmd_queue/util/util_tmux.py b/cmd_queue/util/util_tmux.py index 77c4afa..ce2d8e7 100644 --- a/cmd_queue/util/util_tmux.py +++ b/cmd_queue/util/util_tmux.py @@ -95,14 +95,11 @@ def spawn_monitor_session( ] if extra_args: cmd_parts.extend(extra_args) - # Wrap in a small shell script so the pane stays open after the - # monitor exits, letting the user see the final table. + # After the monitor exits, drop into an interactive shell so the + # pane stays alive and the user can scroll up to read the final + # status table without a synthetic prompt blocking dismissal. inner = ' '.join(shlex.quote(p) for p in cmd_parts) - bash_payload = ( - f'{inner}; ' - 'echo; echo "[cmd_queue monitor exited] press enter to close"; ' - 'read -r _' - ) + bash_payload = f'{inner}; exec bash' new_session_cmd = [ 'tmux', 'new-session', '-d', '-s', session_name, 'bash', '-lc', bash_payload, @@ -129,6 +126,86 @@ def spawn_monitor_session( info['attached_via'] = 'attach-session' return info + @staticmethod + def block_with_attach_prompt( + session_name: str, + is_finished_fn: Any, + refresh_rate: float = 1.0, + label: str = 'queue', + ) -> None: + """ + Block until ``is_finished_fn()`` returns truthy, while letting the + user press ``a`` to attach (or switch) to the given tmux session + and ``q`` / ``d`` to stop watching from the parent shell. + + On a non-TTY stdin (e.g. piped invocation, CI), falls back to a + silent polling loop. + + Args: + session_name: target tmux session for the attach action. + is_finished_fn: zero-arg callable returning True when the + queue is done. + refresh_rate: how often (seconds) to re-check completion and + poll for keypresses. + label: short noun used in the user-facing prompt. + """ + import os + import sys + import time + + if not sys.stdin.isatty(): + while not is_finished_fn(): + time.sleep(refresh_rate) + return + + import select + import termios + import tty + + inside_tmux = bool(os.environ.get('TMUX')) + attach_cmd = ( + f'tmux switch-client -t {session_name}' if inside_tmux + else f'tmux attach -t {session_name}' + ) + print( + f'Watching {label}. Press [a] to attach to monitor session ' + f'({session_name}), [q] to stop watching (queue keeps running).' + ) + print(f'Manual reattach anytime from another shell: {attach_cmd}') + + fd = sys.stdin.fileno() + old_settings = termios.tcgetattr(fd) + try: + tty.setcbreak(fd) + while True: + if is_finished_fn(): + return + ready, _, _ = select.select([sys.stdin], [], [], refresh_rate) + if not ready: + continue + ch = sys.stdin.read(1) + if ch in ('a', 'A'): + # Restore terminal before tmux takes over the tty. + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + try: + if inside_tmux: + ub.cmd(['tmux', 'switch-client', '-t', + session_name], check=False) + else: + ub.cmd(['tmux', 'attach-session', '-t', + session_name], check=False) + finally: + # Re-enter cbreak when the user detaches back. + tty.setcbreak(fd) + elif ch in ('q', 'Q', 'd', 'D'): + return + elif ch == '\x03': # Ctrl-C + raise KeyboardInterrupt + except KeyboardInterrupt: + return + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + @staticmethod def list_panes(target_session: str) -> List[Dict[str, str]]: """ diff --git a/examples/tmux_example.py b/examples/tmux_example.py index d05f99d..1a323cd 100644 --- a/examples/tmux_example.py +++ b/examples/tmux_example.py @@ -36,7 +36,7 @@ def main(): parser.add_argument( '--mode', choices=['inline', 'tmux', 'none'], - default='inline', + default='tmux', help='Where the monitor UI runs.', ) parser.add_argument( From c9cbcaa527de083381013e9532d972d4624e4618 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 18:45:29 +0000 Subject: [PATCH 08/27] Print done summary after headless/tmux monitor completes; expand example DAG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After block_with_attach_prompt and _headless_block_until_done exit, print a Rich-formatted summary line showing pass/fail/skip/total so the user gets a clear completion signal in their original shell. The tmux_example DAG is expanded to 11 jobs across 4 dependency levels (prep → proc → merge → final) with 4 workers and 2-8s sleeps, making parallel execution and dependency fan-in clearly visible in the monitor. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/tmux_queue.py | 24 ++++++++++++++++- examples/tmux_example.py | 58 +++++++++++++++++++++++++++++++--------- 2 files changed, 69 insertions(+), 13 deletions(-) diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index db22ed4..7babb92 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -715,6 +715,25 @@ def run( with_textual=with_textual, ) + @staticmethod + def _print_done_summary(agg_state: Dict[str, Any]) -> None: + from rich import print as rich_print + failed = agg_state.get('failed', 0) + passed = agg_state.get('passed', 0) + skipped = agg_state.get('skipped', 0) + total = agg_state.get('total', 0) + if failed: + status_str = '[bold red]FAILED[/bold red]' + else: + status_str = '[bold green]PASSED[/bold green]' + rich_print( + f'\nQueue complete: {status_str} ' + f'passed=[green]{passed}[/green] ' + f'failed=[red]{failed}[/red] ' + f'skipped=[yellow]{skipped}[/yellow] ' + f'total={total}' + ) + def _dispatch_monitor( self, monitor: str, @@ -735,7 +754,9 @@ def _dispatch_monitor( '[bold]Queue running detached.[/bold] ' f'Reattach with: cmd_queue monitor --manifest={manifest_path}' ) - return self._headless_block_until_done() + agg_state = self._headless_block_until_done() + self._print_done_summary(agg_state) + return agg_state if monitor == 'tmux': if not ub.find_exe('tmux'): import warnings @@ -776,6 +797,7 @@ def _is_finished() -> bool: label=f'queue {self.name}', ) _, _, agg_state = self._build_status_table() + self._print_done_summary(agg_state) return agg_state raise ValueError( f"monitor must be one of 'inline', 'tmux', 'none'; got {monitor!r}" diff --git a/examples/tmux_example.py b/examples/tmux_example.py index 1a323cd..38830f9 100644 --- a/examples/tmux_example.py +++ b/examples/tmux_example.py @@ -16,6 +16,14 @@ jobs finish. Useful in non-interactive scripts. The reattach hint is still printed so a human can attach via ``cmd_queue monitor``. +The job DAG has four levels and shows meaningful parallel execution: + + Level 1 (prep): prep-A prep-B prep-C prep-D (parallel, 5-8s) + Level 2 (process): proc-A proc-B proc-C proc-D (each after one prep, 3-5s) + Level 3 (merge): merge-X (after proc-A + proc-B) + merge-Y (after proc-C + proc-D) (parallel, 3-4s) + Level 4 (finalize): final (after both merges, 2s) + CommandLine: # Default: inline monitor (current shell) python ~/code/cmd_queue/examples/tmux_example.py @@ -46,7 +54,7 @@ def main(): '`cmd_queue monitor `.', ) parser.add_argument( - '--workers', type=int, default=2, + '--workers', type=int, default=4, help='Number of parallel tmux workers.', ) args = parser.parse_args() @@ -57,28 +65,54 @@ def main(): name=args.name, ) - # Build a small DAG so the status table has something interesting to show. - job_a = queue.submit('echo "a starting"; sleep 2; echo "a done"', name='a') - job_b = queue.submit('echo "b starting"; sleep 3; echo "b done"', name='b') + # Level 1: four independent prep jobs — run fully in parallel. + prep_a = queue.submit( + 'echo "[prep-A] start"; sleep 5; echo "[prep-A] done"', name='prep-A') + prep_b = queue.submit( + 'echo "[prep-B] start"; sleep 7; echo "[prep-B] done"', name='prep-B') + prep_c = queue.submit( + 'echo "[prep-C] start"; sleep 6; echo "[prep-C] done"', name='prep-C') + prep_d = queue.submit( + 'echo "[prep-D] start"; sleep 8; echo "[prep-D] done"', name='prep-D') + + # Level 2: each process job depends on exactly one prep job. + proc_a = queue.submit( + 'echo "[proc-A] start"; sleep 3; echo "[proc-A] done"', + name='proc-A', depends=[prep_a]) + proc_b = queue.submit( + 'echo "[proc-B] start"; sleep 4; echo "[proc-B] done"', + name='proc-B', depends=[prep_b]) + proc_c = queue.submit( + 'echo "[proc-C] start"; sleep 5; echo "[proc-C] done"', + name='proc-C', depends=[prep_c]) + proc_d = queue.submit( + 'echo "[proc-D] start"; sleep 3; echo "[proc-D] done"', + name='proc-D', depends=[prep_d]) + + # Level 3: two merge jobs, each waiting on a pair of proc jobs. + merge_x = queue.submit( + 'echo "[merge-X] start"; sleep 4; echo "[merge-X] done"', + name='merge-X', depends=[proc_a, proc_b]) + merge_y = queue.submit( + 'echo "[merge-Y] start"; sleep 3; echo "[merge-Y] done"', + name='merge-Y', depends=[proc_c, proc_d]) + + # Level 4: single finalize job — the whole pipeline converges here. queue.submit( - 'echo "c (depends on a, b)"; sleep 1; echo "c done"', - name='c', - depends=[job_a, job_b], - ) + 'echo "[final] start"; sleep 2; echo "[final] done"', + name='final', depends=[merge_x, merge_y]) queue.print_graph() if not queue.is_available(): raise SystemExit('tmux backend not available on this machine') - print(f'\nLaunching with monitor={args.mode!r}\n') + print(f'\nLaunching with monitor={args.mode!r}, workers={args.workers}\n') - # The interesting line. Identical for any monitor mode — only the - # location of the UI changes. result = queue.run( block=True, monitor=args.mode, - onfail='kill', # tear down idle worker sessions on success + onfail='kill', other_session_handler='kill', ) From 1fae48d692ea0b523bfbb439eb756b1d8687e56e Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 18:54:57 +0000 Subject: [PATCH 09/27] Show failed-job log paths in done summary; note when logs are not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the queue finishes with failures, list each failing job by name along with its log file path (if log=True was passed). For any failed job that doesn't have a log on disk, emit a single hint that logs were not enabled — so the user knows where the gap is rather than seeing the same hint repeated per job. Hooked into the inline monitor path as well so all three monitor modes (inline, tmux, none) produce the same summary. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/tmux_queue.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 7babb92..59afc5c 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -715,8 +715,7 @@ def run( with_textual=with_textual, ) - @staticmethod - def _print_done_summary(agg_state: Dict[str, Any]) -> None: + def _print_done_summary(self, agg_state: Dict[str, Any]) -> None: from rich import print as rich_print failed = agg_state.get('failed', 0) passed = agg_state.get('passed', 0) @@ -733,6 +732,32 @@ def _print_done_summary(agg_state: Dict[str, Any]) -> None: f'skipped=[yellow]{skipped}[/yellow] ' f'total={total}' ) + if failed: + failed_jobs = [] + for worker in self.workers: + for job in getattr(worker, 'jobs', []): + fail_fpath = getattr(job, 'fail_fpath', None) + if fail_fpath is not None and fail_fpath.exists(): + failed_jobs.append(job) + if failed_jobs: + rich_print('[bold red]Failed jobs:[/bold red]') + any_log_missing = False + for job in failed_jobs: + log_fpath = getattr(job, 'log_fpath', None) + if (getattr(job, 'log', False) and log_fpath is not None + and log_fpath.exists()): + rich_print( + f' [red]{job.name}[/red] log: {log_fpath}' + ) + else: + any_log_missing = True + rich_print(f' [red]{job.name}[/red]') + if any_log_missing: + rich_print( + '[yellow]Note:[/yellow] failure logs are not ' + 'enabled for some jobs (pass log=True at ' + 'submit time to capture stdout/stderr to disk).' + ) def _dispatch_monitor( self, @@ -933,6 +958,7 @@ def monitor( self.capture() if onfail == 'kill' and not agg_state.get('failed'): self.kill() + self._print_done_summary(agg_state) return agg_state def _textual_monitor(self): From 642c11dee0b8ded8d69ac4ca1901a12b802412a4 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 18:58:06 +0000 Subject: [PATCH 10/27] Inject forced failures into tmux example to exercise the new summary Add --failures (default 1) to the tmux example so the failure summary and dependency-skip cascade are visible by default. The first N proc-* jobs exit non-zero, which causes their downstream merge/final jobs to be skipped. Pass --failures=0 for a clean run. Also enable log capture by default (--no-logs to disable) so the failed-job log paths printed by the new done-summary actually exist. Co-Authored-By: Claude Opus 4.7 --- examples/tmux_example.py | 71 ++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/examples/tmux_example.py b/examples/tmux_example.py index 38830f9..6fc501d 100644 --- a/examples/tmux_example.py +++ b/examples/tmux_example.py @@ -24,8 +24,12 @@ merge-Y (after proc-C + proc-D) (parallel, 3-4s) Level 4 (finalize): final (after both merges, 2s) +By default one of the proc jobs is forced to fail so the failure +summary (and dependency-skip cascade) is visible. Pass ``--failures=0`` +for a clean run, or higher numbers for more failures. + CommandLine: - # Default: inline monitor (current shell) + # Default: inline monitor (current shell), one forced failure python ~/code/cmd_queue/examples/tmux_example.py # Spawn the monitor in its own tmux session and attach @@ -33,6 +37,9 @@ # Run silently and reattach manually with `cmd_queue monitor ` python ~/code/cmd_queue/examples/tmux_example.py --mode=none + + # Force a clean run (no injected failures) + python ~/code/cmd_queue/examples/tmux_example.py --failures=0 """ import argparse @@ -57,6 +64,16 @@ def main(): '--workers', type=int, default=4, help='Number of parallel tmux workers.', ) + parser.add_argument( + '--failures', type=int, default=1, + help='Number of proc-* jobs to force into failure (0-4). The ' + 'failures cascade: dependent merge/final jobs are skipped.', + ) + parser.add_argument( + '--no-logs', dest='logs', action='store_false', + help='Disable per-job log capture (default: enabled).', + ) + parser.set_defaults(logs=True) args = parser.parse_args() queue = cmd_queue.Queue.create( @@ -65,49 +82,67 @@ def main(): name=args.name, ) + proc_names = ['proc-A', 'proc-B', 'proc-C', 'proc-D'] + fail_set = set(proc_names[:max(0, min(args.failures, len(proc_names)))]) + + def proc_cmd(name: str, sleep: int) -> str: + body = f'echo "[{name}] start"; sleep {sleep}' + if name in fail_set: + return ( + f'{body}; echo "[{name}] FORCED FAILURE" >&2; ' + f'exit 1' + ) + return f'{body}; echo "[{name}] done"' + + submit_kw = {'log': args.logs} + # Level 1: four independent prep jobs — run fully in parallel. prep_a = queue.submit( - 'echo "[prep-A] start"; sleep 5; echo "[prep-A] done"', name='prep-A') + 'echo "[prep-A] start"; sleep 5; echo "[prep-A] done"', + name='prep-A', **submit_kw) prep_b = queue.submit( - 'echo "[prep-B] start"; sleep 7; echo "[prep-B] done"', name='prep-B') + 'echo "[prep-B] start"; sleep 7; echo "[prep-B] done"', + name='prep-B', **submit_kw) prep_c = queue.submit( - 'echo "[prep-C] start"; sleep 6; echo "[prep-C] done"', name='prep-C') + 'echo "[prep-C] start"; sleep 6; echo "[prep-C] done"', + name='prep-C', **submit_kw) prep_d = queue.submit( - 'echo "[prep-D] start"; sleep 8; echo "[prep-D] done"', name='prep-D') + 'echo "[prep-D] start"; sleep 8; echo "[prep-D] done"', + name='prep-D', **submit_kw) - # Level 2: each process job depends on exactly one prep job. + # Level 2: each process job depends on exactly one prep job; some + # may be forced to fail by --failures. proc_a = queue.submit( - 'echo "[proc-A] start"; sleep 3; echo "[proc-A] done"', - name='proc-A', depends=[prep_a]) + proc_cmd('proc-A', 3), name='proc-A', depends=[prep_a], **submit_kw) proc_b = queue.submit( - 'echo "[proc-B] start"; sleep 4; echo "[proc-B] done"', - name='proc-B', depends=[prep_b]) + proc_cmd('proc-B', 4), name='proc-B', depends=[prep_b], **submit_kw) proc_c = queue.submit( - 'echo "[proc-C] start"; sleep 5; echo "[proc-C] done"', - name='proc-C', depends=[prep_c]) + proc_cmd('proc-C', 5), name='proc-C', depends=[prep_c], **submit_kw) proc_d = queue.submit( - 'echo "[proc-D] start"; sleep 3; echo "[proc-D] done"', - name='proc-D', depends=[prep_d]) + proc_cmd('proc-D', 3), name='proc-D', depends=[prep_d], **submit_kw) # Level 3: two merge jobs, each waiting on a pair of proc jobs. merge_x = queue.submit( 'echo "[merge-X] start"; sleep 4; echo "[merge-X] done"', - name='merge-X', depends=[proc_a, proc_b]) + name='merge-X', depends=[proc_a, proc_b], **submit_kw) merge_y = queue.submit( 'echo "[merge-Y] start"; sleep 3; echo "[merge-Y] done"', - name='merge-Y', depends=[proc_c, proc_d]) + name='merge-Y', depends=[proc_c, proc_d], **submit_kw) # Level 4: single finalize job — the whole pipeline converges here. queue.submit( 'echo "[final] start"; sleep 2; echo "[final] done"', - name='final', depends=[merge_x, merge_y]) + name='final', depends=[merge_x, merge_y], **submit_kw) queue.print_graph() if not queue.is_available(): raise SystemExit('tmux backend not available on this machine') - print(f'\nLaunching with monitor={args.mode!r}, workers={args.workers}\n') + print( + f'\nLaunching with monitor={args.mode!r}, workers={args.workers}, ' + f'failures={args.failures}, logs={args.logs}\n' + ) result = queue.run( block=True, From 3e42fd3833ec1fec0992fde9ff88c354b8a1e0fc Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 19:00:14 +0000 Subject: [PATCH 11/27] Surface failed jobs (with log paths) inside the live monitor view Render a 'Failed jobs' table directly below the per-worker status table while the queue is still running, so failures are visible the moment they happen rather than only in the post-run summary. Each row shows the job name and its log path (or '(no log)' when log capture wasn't enabled); a one-line note reminds the user to pass log=True if any failed jobs lack a log on disk. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/tmux_queue.py | 61 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 59afc5c..6806e0e 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -987,6 +987,57 @@ def _textual_monitor(self): self.kill() is_running = False + def _build_failed_jobs_renderable(self) -> Any: + """Renderable summary of currently-failed jobs, or None. + + Used by the live monitor to surface failures (and their log + paths, when available) as soon as they happen, rather than only + in the post-run summary. + """ + failed_jobs = [] + for worker in self.workers: + for job in getattr(worker, 'jobs', []): + fail_fpath = getattr(job, 'fail_fpath', None) + if fail_fpath is not None and fail_fpath.exists(): + failed_jobs.append(job) + if not failed_jobs: + return None + from rich.table import Table + from rich.console import Group + from rich.text import Text + ftable = Table( + title='Failed jobs', title_style='bold red', + show_header=True, header_style='red', + ) + ftable.add_column('name', style='red') + ftable.add_column('log') + any_log_missing = False + for job in failed_jobs: + log_fpath = getattr(job, 'log_fpath', None) + if (getattr(job, 'log', False) and log_fpath is not None + and log_fpath.exists()): + ftable.add_row(job.name, str(log_fpath)) + else: + any_log_missing = True + ftable.add_row(job.name, '[dim](no log)[/dim]') + if any_log_missing: + return Group( + ftable, + Text( + 'Note: failure logs are not enabled for some jobs ' + '(pass log=True at submit time).', + style='yellow', + ), + ) + return ftable + + def _build_live_renderable(self): + from rich.console import Group + table, finished, agg_state = self._build_status_table() + failed = self._build_failed_jobs_renderable() + renderable = Group(table, failed) if failed is not None else table + return renderable, finished, agg_state + def _simple_rich_monitor(self, refresh_rate=0.4): import time from rich.live import Live @@ -995,12 +1046,14 @@ def _simple_rich_monitor(self, refresh_rate=0.4): for command in self._kill_commands(): print(command) try: - table, finished, agg_state = self._build_status_table() - with Live(table, refresh_per_second=4) as live: + renderable, finished, agg_state = self._build_live_renderable() + with Live(renderable, refresh_per_second=4) as live: while not finished: time.sleep(refresh_rate) - table, finished, agg_state = self._build_status_table() - live.update(table) + renderable, finished, agg_state = ( + self._build_live_renderable() + ) + live.update(renderable) except KeyboardInterrupt: from rich.prompt import Confirm flag = Confirm.ask('do you to kill the procs?') From bc193da6d3eb667559107094cc9e9ca2cab7de84 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 19:05:04 +0000 Subject: [PATCH 12/27] Persist per-job info in monitor manifest so out-of-process monitor sees failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the monitor is rehydrated via cmd_queue monitor --manifest=..., the reconstructed workers had empty .jobs lists, so the failed-jobs panel and post-run summary couldn't surface any failing job names — even when fail markers existed on disk. Serialize each job's name, log flag, and fail/log paths into the manifest, and rebuild lightweight SimpleNamespace stubs on each reconstructed worker. Enough surface for the failure renderer; we don't need the full BashJob since the monitor never re-runs anything. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/tmux_queue.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 6806e0e..2caac9b 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -1212,6 +1212,16 @@ def _build_monitor_manifest(self) -> Dict[str, Any]: """Snapshot enough state for an out-of-process monitor to reattach.""" workers_info = [] for worker in self.workers: + jobs_info = [] + for job in getattr(worker, 'jobs', []): + fail_fpath = getattr(job, 'fail_fpath', None) + log_fpath = getattr(job, 'log_fpath', None) + jobs_info.append({ + 'name': getattr(job, 'name', None), + 'log': bool(getattr(job, 'log', False)), + 'fail_fpath': str(fail_fpath) if fail_fpath else None, + 'log_fpath': str(log_fpath) if log_fpath else None, + }) workers_info.append({ 'name': worker.name, 'rootid': worker.rootid, @@ -1220,6 +1230,7 @@ def _build_monitor_manifest(self) -> Dict[str, Any]: 'state_fpath': str(worker.state_fpath), 'fpath': str(worker.fpath), 'environ': dict(worker.environ or {}), + 'jobs': jobs_info, }) return { 'backend': 'tmux', @@ -1262,15 +1273,29 @@ def _from_manifest(cls, manifest: Dict[str, Any]) -> "TMUXMultiQueue": self.job_info_dpath = self.dpath / 'job_info' self.preamble = [] self.jobs = [] - self.workers = [ - serial_queue.SerialQueue( + import types + workers = [] + for w in manifest.get('workers', []): + worker = serial_queue.SerialQueue( name=w['name'], rootid=w['rootid'], dpath=ub.Path(w['dpath']), environ=w.get('environ') or {}, ) - for w in manifest.get('workers', []) - ] + # Rehydrate lightweight job stubs so the monitor can show + # per-job failure rows. We don't need the full BashJob — only + # the attributes the failed-jobs renderer reads. + stubs = [] + for j in w.get('jobs') or []: + stubs.append(types.SimpleNamespace( + name=j.get('name'), + log=bool(j.get('log', False)), + fail_fpath=ub.Path(j['fail_fpath']) if j.get('fail_fpath') else None, + log_fpath=ub.Path(j['log_fpath']) if j.get('log_fpath') else None, + )) + worker.jobs = stubs + workers.append(worker) + self.workers = workers return self From dd31e3b330999458ac4aa4159cc8c1f5ee8babb8 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 19:30:03 +0000 Subject: [PATCH 13/27] Stop double-marking skipped jobs as failed; render skipped section in monitor The bash boilerplate generated by BashJob.finalize_text ran an unconditional if-RC-0-on_pass-else-on_fail after the deps-check, so a skipped job (RC=126, on_skip already ran) ALSO had fail_fpath written and NUM_FAILED incremented. The status agg therefore showed a skipped+failed double-count. Fix: * Add a skip_fpath marker (printed by the on_skip block). * Make the post-RC dispatch 3-way: on_pass for RC=0, no-op for RC=126, on_fail otherwise. Monitor: * Carry skip_fpath and dependency names in the rehydration manifest. * Replace single failed panel with Failed + Skipped tables; skipped rows show a reason like dep X failed. * Same split applied to the post-run summary. Update tests/test_bash_variants.py: the prior test asserted the bug. Co-Authored-By: Claude Opus 4.7 --- cmd_queue/serial_queue.py | 11 ++- cmd_queue/tmux_queue.py | 188 +++++++++++++++++++++++++----------- tests/test_bash_variants.py | 7 +- 3 files changed, 148 insertions(+), 58 deletions(-) diff --git a/cmd_queue/serial_queue.py b/cmd_queue/serial_queue.py index cfcf09a..2b71619 100644 --- a/cmd_queue/serial_queue.py +++ b/cmd_queue/serial_queue.py @@ -119,6 +119,7 @@ def __init__( self.info_dpath = info_dpath self.pass_fpath = self.info_dpath / f'passed/{self.pathid}.pass' self.fail_fpath = self.info_dpath / f'failed/{self.pathid}.fail' + self.skip_fpath = self.info_dpath / f'skipped/{self.pathid}.skip' self.stat_fpath = self.info_dpath / f'status/{self.pathid}.stat' self.log_fpath = self.info_dpath / f'status/{self.pathid}.logs' self.tags = util_tags.Tags.coerce(tags) @@ -172,7 +173,10 @@ def finalize_text( f'printf "fail" > {self.fail_fpath}', ], # when dependencies are unmet - 'on_skip': [ ] + 'on_skip': [ + f'mkdir -p {self.skip_fpath.parent}', + f'printf "skip" > {self.skip_fpath}', + ] } # Append custom conditionals @@ -326,9 +330,14 @@ def finalize_text( on_pass_part = indent(_job_conditionals['on_pass']) on_fail_part = indent(_job_conditionals['on_fail']) + # RETURN_CODE=126 means dependencies were unmet; on_skip + # already ran in the deps-failed branch above, so we don't + # want to also mark the job as failed here. conditional_body = '\n'.join([ 'if [[ "$RETURN_CODE" == "0" ]]; then', on_pass_part, + 'elif [[ "$RETURN_CODE" == "126" ]]; then', + ' : # job was skipped; on_skip already handled', 'else', on_fail_part, 'fi' diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 2caac9b..3386587 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -732,32 +732,38 @@ def _print_done_summary(self, agg_state: Dict[str, Any]) -> None: f'skipped=[yellow]{skipped}[/yellow] ' f'total={total}' ) - if failed: - failed_jobs = [] - for worker in self.workers: - for job in getattr(worker, 'jobs', []): - fail_fpath = getattr(job, 'fail_fpath', None) - if fail_fpath is not None and fail_fpath.exists(): - failed_jobs.append(job) - if failed_jobs: - rich_print('[bold red]Failed jobs:[/bold red]') - any_log_missing = False - for job in failed_jobs: - log_fpath = getattr(job, 'log_fpath', None) - if (getattr(job, 'log', False) and log_fpath is not None - and log_fpath.exists()): - rich_print( - f' [red]{job.name}[/red] log: {log_fpath}' - ) - else: - any_log_missing = True - rich_print(f' [red]{job.name}[/red]') - if any_log_missing: + failed_jobs, skipped_jobs, status_by_name = ( + self._collect_failed_and_skipped() + ) + if failed_jobs: + rich_print('[bold red]Failed jobs:[/bold red]') + any_log_missing = False + for job in failed_jobs: + log_fpath = getattr(job, 'log_fpath', None) + if (getattr(job, 'log', False) and log_fpath is not None + and log_fpath.exists()): rich_print( - '[yellow]Note:[/yellow] failure logs are not ' - 'enabled for some jobs (pass log=True at ' - 'submit time to capture stdout/stderr to disk).' + f' [red]{job.name}[/red] log: {log_fpath}' ) + else: + any_log_missing = True + rich_print(f' [red]{job.name}[/red] [dim](no log)[/dim]') + if any_log_missing: + rich_print( + '[yellow]Note:[/yellow] failure logs are not ' + 'enabled for some failed jobs (pass log=True at ' + 'submit time to capture stdout/stderr to disk).' + ) + if skipped_jobs: + rich_print('[bold yellow]Skipped jobs:[/bold yellow]') + for job in skipped_jobs: + reason = self._skip_reason(job, status_by_name) + if reason: + rich_print( + f' [yellow]{job.name}[/yellow] ({reason})' + ) + else: + rich_print(f' [yellow]{job.name}[/yellow]') def _dispatch_monitor( self, @@ -987,49 +993,111 @@ def _textual_monitor(self): self.kill() is_running = False - def _build_failed_jobs_renderable(self) -> Any: - """Renderable summary of currently-failed jobs, or None. + def _collect_failed_and_skipped(self): + """Walk worker.jobs and partition into failed / skipped lists. - Used by the live monitor to surface failures (and their log - paths, when available) as soon as they happen, rather than only - in the post-run summary. + A job is *failed* if its fail_fpath exists, and *skipped* if its + skip_fpath exists. The two are mutually exclusive: the bash + boilerplate writes one or the other but never both. """ - failed_jobs = [] + failed = [] + skipped = [] + # Map job name -> status so we can fill in skip reasons. + status_by_name: Dict[str, str] = {} for worker in self.workers: for job in getattr(worker, 'jobs', []): fail_fpath = getattr(job, 'fail_fpath', None) + skip_fpath = getattr(job, 'skip_fpath', None) if fail_fpath is not None and fail_fpath.exists(): - failed_jobs.append(job) - if not failed_jobs: + failed.append(job) + if getattr(job, 'name', None): + status_by_name[job.name] = 'failed' + elif skip_fpath is not None and skip_fpath.exists(): + skipped.append(job) + if getattr(job, 'name', None): + status_by_name[job.name] = 'skipped' + return failed, skipped, status_by_name + + @staticmethod + def _skip_reason(job: Any, status_by_name: Dict[str, str]) -> str: + """Best-effort explanation of why a job was skipped. + + Looks at the job's recorded dependency names and reports the + first one whose status is not 'passed'. Returns a short string + like 'dep proc-A failed' or '' if no clear reason. + """ + depends = getattr(job, 'depends', None) or [] + bad = [] + for dep_name in depends: + if not dep_name: + continue + st = status_by_name.get(dep_name) + if st in ('failed', 'skipped'): + bad.append((dep_name, st)) + if not bad: + return '' + if len(bad) == 1: + name, st = bad[0] + return f'dep {name} {st}' + names = ', '.join(f'{n} {s}' for n, s in bad) + return f'deps: {names}' + + def _build_failed_jobs_renderable(self) -> Any: + """Renderable summary of failed and skipped jobs, or None. + + Used by the live monitor to surface failures and skips (and the + reason for each skip) as soon as they happen, rather than only + in the post-run summary. + """ + failed, skipped, status_by_name = self._collect_failed_and_skipped() + if not failed and not skipped: return None from rich.table import Table from rich.console import Group from rich.text import Text - ftable = Table( - title='Failed jobs', title_style='bold red', - show_header=True, header_style='red', - ) - ftable.add_column('name', style='red') - ftable.add_column('log') + + renderables = [] any_log_missing = False - for job in failed_jobs: - log_fpath = getattr(job, 'log_fpath', None) - if (getattr(job, 'log', False) and log_fpath is not None - and log_fpath.exists()): - ftable.add_row(job.name, str(log_fpath)) - else: - any_log_missing = True - ftable.add_row(job.name, '[dim](no log)[/dim]') - if any_log_missing: - return Group( - ftable, - Text( - 'Note: failure logs are not enabled for some jobs ' - '(pass log=True at submit time).', - style='yellow', - ), + + if failed: + ftable = Table( + title='Failed jobs', title_style='bold red', + show_header=True, header_style='red', + ) + ftable.add_column('name', style='red') + ftable.add_column('log') + for job in failed: + log_fpath = getattr(job, 'log_fpath', None) + if (getattr(job, 'log', False) and log_fpath is not None + and log_fpath.exists()): + ftable.add_row(job.name, str(log_fpath)) + else: + any_log_missing = True + ftable.add_row(job.name, '[dim](no log)[/dim]') + renderables.append(ftable) + + if skipped: + stable = Table( + title='Skipped jobs', title_style='bold yellow', + show_header=True, header_style='yellow', ) - return ftable + stable.add_column('name', style='yellow') + stable.add_column('reason') + for job in skipped: + reason = self._skip_reason(job, status_by_name) + stable.add_row(job.name, reason or '[dim](unknown)[/dim]') + renderables.append(stable) + + if any_log_missing: + renderables.append(Text( + 'Note: failure logs are not enabled for some failed ' + 'jobs (pass log=True at submit time).', + style='yellow', + )) + + if len(renderables) == 1: + return renderables[0] + return Group(*renderables) def _build_live_renderable(self): from rich.console import Group @@ -1215,12 +1283,20 @@ def _build_monitor_manifest(self) -> Dict[str, Any]: jobs_info = [] for job in getattr(worker, 'jobs', []): fail_fpath = getattr(job, 'fail_fpath', None) + skip_fpath = getattr(job, 'skip_fpath', None) log_fpath = getattr(job, 'log_fpath', None) + depends = getattr(job, 'depends', None) or [] + depends_names = [ + getattr(d, 'name', None) for d in depends + if d is not None and getattr(d, 'name', None) + ] jobs_info.append({ 'name': getattr(job, 'name', None), 'log': bool(getattr(job, 'log', False)), 'fail_fpath': str(fail_fpath) if fail_fpath else None, + 'skip_fpath': str(skip_fpath) if skip_fpath else None, 'log_fpath': str(log_fpath) if log_fpath else None, + 'depends': depends_names, }) workers_info.append({ 'name': worker.name, @@ -1291,7 +1367,9 @@ def _from_manifest(cls, manifest: Dict[str, Any]) -> "TMUXMultiQueue": name=j.get('name'), log=bool(j.get('log', False)), fail_fpath=ub.Path(j['fail_fpath']) if j.get('fail_fpath') else None, + skip_fpath=ub.Path(j['skip_fpath']) if j.get('skip_fpath') else None, log_fpath=ub.Path(j['log_fpath']) if j.get('log_fpath') else None, + depends=list(j.get('depends') or []), )) worker.jobs = stubs workers.append(worker) diff --git a/tests/test_bash_variants.py b/tests/test_bash_variants.py index 8b2d2fc..cc26956 100644 --- a/tests/test_bash_variants.py +++ b/tests/test_bash_variants.py @@ -329,6 +329,7 @@ def test_bashjob_exec_depends_unmet_skips(): job.stat_fpath = tmp_path / "job2.status.json" job.pass_fpath = tmp_path / "job2.pass" job.fail_fpath = tmp_path / "job2.fail" + job.skip_fpath = tmp_path / "job2.skip" text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -343,8 +344,10 @@ def test_bashjob_exec_depends_unmet_skips(): ) assert not outfile.exists(), "command should not run if dependency is unmet" - # With current semantics, skip sets RETURN_CODE=126, which counts as fail - assert job.fail_fpath.exists(), "skipped job should be marked as fail (ret=126)" + # Skipped jobs (deps unmet, RC=126) write skip_fpath only — they + # are NOT also marked as failed. + assert job.skip_fpath.exists(), "skipped job should be marked as skip" + assert not job.fail_fpath.exists(), "skipped job should not be marked as fail" assert not job.pass_fpath.exists() status = kwutil.Json.load(job.stat_fpath) From 435ceab8d3bae52706daa0b5588b421db4f20af7 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:21:19 -0400 Subject: [PATCH 14/27] manual example updates --- cmd_queue/util/util_tmux.py | 10 +-- examples/tmux_example.py | 166 +++++++++++++++++++++--------------- 2 files changed, 103 insertions(+), 73 deletions(-) diff --git a/cmd_queue/util/util_tmux.py b/cmd_queue/util/util_tmux.py index ce2d8e7..4ce765b 100644 --- a/cmd_queue/util/util_tmux.py +++ b/cmd_queue/util/util_tmux.py @@ -167,11 +167,11 @@ def block_with_attach_prompt( f'tmux switch-client -t {session_name}' if inside_tmux else f'tmux attach -t {session_name}' ) - print( - f'Watching {label}. Press [a] to attach to monitor session ' - f'({session_name}), [q] to stop watching (queue keeps running).' - ) - print(f'Manual reattach anytime from another shell: {attach_cmd}') + print(f'Watching {label}.') + import rich + rich.print(rf'[bold]Press \[a][/bold] to attach to monitor session ({session_name})') + rich.print(r'[bold]Press \[q][/bold] to stop watching (queue keeps running).') + print(f'Manual reattach anytime from another shell:\n{attach_cmd}') fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) diff --git a/examples/tmux_example.py b/examples/tmux_example.py index 6fc501d..c5b5cb4 100644 --- a/examples/tmux_example.py +++ b/examples/tmux_example.py @@ -16,7 +16,9 @@ jobs finish. Useful in non-interactive scripts. The reattach hint is still printed so a human can attach via ``cmd_queue monitor``. -The job DAG has four levels and shows meaningful parallel execution: +The job DAG has four logical levels and shows meaningful parallel execution. +Each logical job is split into a serial chain of smaller one-second jobs. +This creates more queue jobs while keeping the total runtime roughly the same. Level 1 (prep): prep-A prep-B prep-C prep-D (parallel, 5-8s) Level 2 (process): proc-A proc-B proc-C proc-D (each after one prep, 3-5s) @@ -41,40 +43,36 @@ # Force a clean run (no injected failures) python ~/code/cmd_queue/examples/tmux_example.py --failures=0 """ -import argparse +import ubelt as ub +import scriptconfig as scfg + + +class TmuxExampleConfig(scfg.DataConfig): + """ + Automatically created module for IPython interactive environment + """ + mode = scfg.Value('tmux', help='Where the monitor UI runs.', choices=['inline', 'tmux', 'none']) + name = scfg.Value('tmux-example', help=ub.paragraph( + ''' + Queue name; also doubles as the lookup key for `cmd_queue + monitor `. + ''')) + workers = scfg.Value(4, type=int, help='Number of parallel tmux workers.') + failures = scfg.Value(6, type=int, help=ub.paragraph( + ''' + Number of proc-* logical jobs to force into failure (0-4). + The failures cascade: dependent merge/final jobs are skipped. + ''')) + logs = scfg.Value(True, isflag=True, help=ub.paragraph( + ''' + Set to False to disable per-job log capture (default: enabled). + ''')) def main(): import cmd_queue - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - '--mode', - choices=['inline', 'tmux', 'none'], - default='tmux', - help='Where the monitor UI runs.', - ) - parser.add_argument( - '--name', - default='tmux-example', - help='Queue name; also doubles as the lookup key for ' - '`cmd_queue monitor `.', - ) - parser.add_argument( - '--workers', type=int, default=4, - help='Number of parallel tmux workers.', - ) - parser.add_argument( - '--failures', type=int, default=1, - help='Number of proc-* jobs to force into failure (0-4). The ' - 'failures cascade: dependent merge/final jobs are skipped.', - ) - parser.add_argument( - '--no-logs', dest='logs', action='store_false', - help='Disable per-job log capture (default: enabled).', - ) - parser.set_defaults(logs=True) - args = parser.parse_args() + args = TmuxExampleConfig.cli() queue = cmd_queue.Queue.create( backend='tmux', @@ -85,54 +83,86 @@ def main(): proc_names = ['proc-A', 'proc-B', 'proc-C', 'proc-D'] fail_set = set(proc_names[:max(0, min(args.failures, len(proc_names)))]) - def proc_cmd(name: str, sleep: int) -> str: - body = f'echo "[{name}] start"; sleep {sleep}' - if name in fail_set: - return ( - f'{body}; echo "[{name}] FORCED FAILURE" >&2; ' - f'exit 1' + submit_kw = {'log': args.logs} + + def submit_sleep_chain(base_name, total_sleep, depends=None, fail=False): + """ + Submit a logical sleep job as a chain of smaller queue jobs. + + This keeps the logical runtime roughly equal to ``total_sleep``, + but gives the tmux monitor more individual jobs to display. + + Example: + ``submit_sleep_chain('prep-A', 5)`` creates: + + prep-A-01 -> prep-A-02 -> prep-A-03 -> prep-A-04 -> prep-A-05 + + Each part sleeps for one second, so the total duration is still + about five seconds, plus a small amount of scheduling overhead. + """ + if total_sleep <= 0: + raise ValueError('total_sleep must be positive') + + prev_depends = list(depends or []) + last_job = None + + for idx in range(total_sleep): + part = idx + 1 + name = f'{base_name}-{part:02d}' + is_final_part = part == total_sleep + + cmd = ( + f'echo "[{name}] start"; ' + f'sleep 1; ' ) - return f'{body}; echo "[{name}] done"' - submit_kw = {'log': args.logs} + if is_final_part and fail: + cmd += ( + f'echo "[{base_name}] FORCED FAILURE" >&2; ' + f'exit 1' + ) + elif is_final_part: + cmd += f'echo "[{base_name}] done"' + else: + cmd += f'echo "[{name}] done"' + + last_job = queue.submit( + cmd, + name=name, + depends=prev_depends, + **submit_kw, + ) + prev_depends = [last_job] + + return last_job # Level 1: four independent prep jobs — run fully in parallel. - prep_a = queue.submit( - 'echo "[prep-A] start"; sleep 5; echo "[prep-A] done"', - name='prep-A', **submit_kw) - prep_b = queue.submit( - 'echo "[prep-B] start"; sleep 7; echo "[prep-B] done"', - name='prep-B', **submit_kw) - prep_c = queue.submit( - 'echo "[prep-C] start"; sleep 6; echo "[prep-C] done"', - name='prep-C', **submit_kw) - prep_d = queue.submit( - 'echo "[prep-D] start"; sleep 8; echo "[prep-D] done"', - name='prep-D', **submit_kw) + # Each logical prep job is split into a serial chain of smaller jobs. + prep_a = submit_sleep_chain('prep-A', 5) + prep_b = submit_sleep_chain('prep-B', 7) + prep_c = submit_sleep_chain('prep-C', 6) + prep_d = submit_sleep_chain('prep-D', 8) # Level 2: each process job depends on exactly one prep job; some # may be forced to fail by --failures. - proc_a = queue.submit( - proc_cmd('proc-A', 3), name='proc-A', depends=[prep_a], **submit_kw) - proc_b = queue.submit( - proc_cmd('proc-B', 4), name='proc-B', depends=[prep_b], **submit_kw) - proc_c = queue.submit( - proc_cmd('proc-C', 5), name='proc-C', depends=[prep_c], **submit_kw) - proc_d = queue.submit( - proc_cmd('proc-D', 3), name='proc-D', depends=[prep_d], **submit_kw) + proc_a = submit_sleep_chain( + 'proc-A', 3, depends=[prep_a], fail='proc-A' in fail_set) + proc_b = submit_sleep_chain( + 'proc-B', 4, depends=[prep_b], fail='proc-B' in fail_set) + proc_c = submit_sleep_chain( + 'proc-C', 5, depends=[prep_c], fail='proc-C' in fail_set) + proc_d = submit_sleep_chain( + 'proc-D', 3, depends=[prep_d], fail='proc-D' in fail_set) # Level 3: two merge jobs, each waiting on a pair of proc jobs. - merge_x = queue.submit( - 'echo "[merge-X] start"; sleep 4; echo "[merge-X] done"', - name='merge-X', depends=[proc_a, proc_b], **submit_kw) - merge_y = queue.submit( - 'echo "[merge-Y] start"; sleep 3; echo "[merge-Y] done"', - name='merge-Y', depends=[proc_c, proc_d], **submit_kw) + merge_x = submit_sleep_chain( + 'merge-X', 4, depends=[proc_a, proc_b]) + merge_y = submit_sleep_chain( + 'merge-Y', 3, depends=[proc_c, proc_d]) # Level 4: single finalize job — the whole pipeline converges here. - queue.submit( - 'echo "[final] start"; sleep 2; echo "[final] done"', - name='final', depends=[merge_x, merge_y], **submit_kw) + submit_sleep_chain( + 'final', 2, depends=[merge_x, merge_y]) queue.print_graph() @@ -148,7 +178,7 @@ def proc_cmd(name: str, sleep: int) -> str: block=True, monitor=args.mode, onfail='kill', - other_session_handler='kill', + other_session_handler='auto', ) print(f'\nrun() returned: {result}') From 60837a0c1aa7baaa71fa6b596693db03c5293c90 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:22:00 -0400 Subject: [PATCH 15/27] Add type and ruff configs --- pyproject.toml | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f91884..cd12105 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,6 @@ [build-system] requires = [ "setuptools>=41.0.1", "wheel>=0.37.1",] -[tool.mypy] -ignore_missing_imports = true - [tool.xcookie] tags = [ "kitware", "purepy", "gitlab",] mod_name = "cmd_queue" @@ -40,3 +37,33 @@ skip = ['./docs/build', './*.egg-info', './build', './htmlcov'] count = true quiet-level = 3 ignore-words-list = ['wont', 'cant', 'ANS', 'doesnt', 'arent', 'ans', 'thats', 'datas', 'isnt'] + + +[tool.mypy] +ignore_missing_imports = true +#ignore_errors = true + +[tool.ty.rules] +unused-ignore-comment = "ignore" +unused-type-ignore-comment = "ignore" +unresolved-import = "ignore" + +[tool.ruff] +line-length = 80 +target-version = "py38" + +[tool.ruff.lint] +# Enable Flake8 (E, F) and isort (I) rules. +select = ["E", "F", "I"] +# Ignore specific rules, for example, E501 (line too long) as it's handled by the formatter. +ignore = [ + "E501", # line too long + "E402", # Module level import not at top of file +] + +[tool.ruff.format] +quote-style = "single" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" +docstring-code-format = false From 45fa37022101c4b694f88884603028aa3e9b61a2 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:29:30 -0400 Subject: [PATCH 16/27] Update tests. Drop 3.9 support --- pyproject.toml | 4 ++-- requirements/tests.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cd12105..d0ec669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ repo_name = "cmd_queue" author = "Kitware Inc., Jon Crall" author_email = "kitware@kitware.com, jon.crall@kitware.com" description = "The cmd_queue module for a DAG of bash commands" -min_python = "3.9" +min_python = "3.10" url = "https://gitlab.kitware.com/computer-vision/cmd_queue" license = "Apache 2" dev_status = "beta" @@ -50,7 +50,7 @@ unresolved-import = "ignore" [tool.ruff] line-length = 80 -target-version = "py38" +target-version = "py310" [tool.ruff.lint] # Enable Flake8 (E, F) and isort (I) rules. diff --git a/requirements/tests.txt b/requirements/tests.txt index bf7acfe..260e09f 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -10,7 +10,7 @@ xdoctest >= 1.1.5 pytest-cov>=4.1.0 ; python_version < '4.0' and python_version >= '3.11' # Python 3.11+ pytest-cov>=3.0.0 ; python_version < '3.11' # -coverage>=7.0.0 ; python_version < '4.0' and python_version >= '3.11' # Python 3.11+ -coverage>=6.1.2 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 -coverage>=6.1.1 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 +coverage>=7.3.0 ; python_version < '4.0' and python_version >= '3.12' # Python 3.12 +coverage>=6.1.1 ; python_version < '3.12' and python_version >= '3.10' # Python 3.10-3.11 +coverage>=5.3.1 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 coverage>=6.1.1 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 From 7bdf8acee06740d8eeb937bd99ace1148f447e50 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:29:56 -0400 Subject: [PATCH 17/27] Update xcookie --- .gitlab-ci.yml | 188 +++++++++---------- dev/setup_secrets.sh | 424 +++++++++++++++++++++++++++++++++++++++---- docs/source/conf.py | 197 +++++++++++++------- setup.py | 186 ++++++++++--------- 4 files changed, 712 insertions(+), 283 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3e7a18b..1026d4a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -94,30 +94,36 @@ stages: - 'echo "Installing helpers: setuptools"' - python -m uv pip install setuptools>=0.8 setuptools_scm wheel build -U - 'echo "Installing helpers: tomli and pkginfo"' - - python -m uv pip install tomli pkginfo + - python -m uv pip install tomli pkginfo packaging - |- export WHEEL_FPATH=$(python -c "if 1: import pathlib + from packaging import tags + from packaging.utils import parse_wheel_filename dist_dpath = pathlib.Path('dist') - candidates = list(dist_dpath.glob('cmd_queue*.whl')) - candidates += list(dist_dpath.glob('cmd_queue*.tar.gz')) - fpath = sorted(candidates)[-1] + wheels = sorted(dist_dpath.glob('cmd_queue*.whl')) + if wheels: + sys_tags = set(tags.sys_tags()) + matching = [] + for w in wheels: + try: + _, _, _, wheel_tags = parse_wheel_filename(w.name) + except Exception: + continue + if any(t in sys_tags for t in wheel_tags): + matching.append(w) + fpath = sorted(matching or wheels)[-1] + else: + sdists = sorted(dist_dpath.glob('cmd_queue*.tar.gz')) + if not sdists: + raise SystemExit('No wheel artifacts found in wheelhouse') + fpath = sdists[-1] print(str(fpath).replace(chr(92), chr(47))) ") - - |- - export MOD_VERSION=$(python -c "if 1: - from pkginfo import Wheel, SDist - import pathlib - fpath = '$WHEEL_FPATH' - cls = Wheel if fpath.endswith('.whl') else SDist - item = cls(fpath) - print(item.version) - ") - echo "WHEEL_FPATH=$WHEEL_FPATH" - echo "INSTALL_EXTRAS=$INSTALL_EXTRAS" - echo "UV_RESOLUTION=$UV_RESOLUTION" - - echo "MOD_VERSION=$MOD_VERSION" - - python -m pip install --prefer-binary "cmd_queue[$INSTALL_EXTRAS]==$MOD_VERSION" -f dist + - python -m pip install --prefer-binary "${WHEEL_FPATH}[${INSTALL_EXTRAS}]" - echo "Install finished." - echo "Creating test sandbox directory" - export WORKSPACE_DNAME="sandbox" @@ -164,30 +170,36 @@ stages: - 'echo "Installing helpers: setuptools"' - python -m uv pip install setuptools>=0.8 setuptools_scm wheel build -U - 'echo "Installing helpers: tomli and pkginfo"' - - python -m uv pip install tomli pkginfo + - python -m uv pip install tomli pkginfo packaging - |- export WHEEL_FPATH=$(python -c "if 1: import pathlib + from packaging import tags + from packaging.utils import parse_wheel_filename dist_dpath = pathlib.Path('dist') - candidates = list(dist_dpath.glob('cmd_queue*.whl')) - candidates += list(dist_dpath.glob('cmd_queue*.tar.gz')) - fpath = sorted(candidates)[-1] + wheels = sorted(dist_dpath.glob('cmd_queue*.whl')) + if wheels: + sys_tags = set(tags.sys_tags()) + matching = [] + for w in wheels: + try: + _, _, _, wheel_tags = parse_wheel_filename(w.name) + except Exception: + continue + if any(t in sys_tags for t in wheel_tags): + matching.append(w) + fpath = sorted(matching or wheels)[-1] + else: + sdists = sorted(dist_dpath.glob('cmd_queue*.tar.gz')) + if not sdists: + raise SystemExit('No wheel artifacts found in wheelhouse') + fpath = sdists[-1] print(str(fpath).replace(chr(92), chr(47))) ") - - |- - export MOD_VERSION=$(python -c "if 1: - from pkginfo import Wheel, SDist - import pathlib - fpath = '$WHEEL_FPATH' - cls = Wheel if fpath.endswith('.whl') else SDist - item = cls(fpath) - print(item.version) - ") - echo "WHEEL_FPATH=$WHEEL_FPATH" - echo "INSTALL_EXTRAS=$INSTALL_EXTRAS" - echo "UV_RESOLUTION=$UV_RESOLUTION" - - echo "MOD_VERSION=$MOD_VERSION" - - python -m pip install --prefer-binary "cmd_queue[$INSTALL_EXTRAS]==$MOD_VERSION" -f dist + - python -m pip install --prefer-binary "${WHEEL_FPATH}[${INSTALL_EXTRAS}]" - echo "Install finished." - echo "Creating test sandbox directory" - export WORKSPACE_DNAME="sandbox" @@ -234,30 +246,36 @@ stages: - 'echo "Installing helpers: setuptools"' - python -m uv pip install setuptools>=0.8 setuptools_scm wheel build -U - 'echo "Installing helpers: tomli and pkginfo"' - - python -m uv pip install tomli pkginfo + - python -m uv pip install tomli pkginfo packaging - |- export WHEEL_FPATH=$(python -c "if 1: import pathlib + from packaging import tags + from packaging.utils import parse_wheel_filename dist_dpath = pathlib.Path('dist') - candidates = list(dist_dpath.glob('cmd_queue*.whl')) - candidates += list(dist_dpath.glob('cmd_queue*.tar.gz')) - fpath = sorted(candidates)[-1] + wheels = sorted(dist_dpath.glob('cmd_queue*.whl')) + if wheels: + sys_tags = set(tags.sys_tags()) + matching = [] + for w in wheels: + try: + _, _, _, wheel_tags = parse_wheel_filename(w.name) + except Exception: + continue + if any(t in sys_tags for t in wheel_tags): + matching.append(w) + fpath = sorted(matching or wheels)[-1] + else: + sdists = sorted(dist_dpath.glob('cmd_queue*.tar.gz')) + if not sdists: + raise SystemExit('No wheel artifacts found in wheelhouse') + fpath = sdists[-1] print(str(fpath).replace(chr(92), chr(47))) ") - - |- - export MOD_VERSION=$(python -c "if 1: - from pkginfo import Wheel, SDist - import pathlib - fpath = '$WHEEL_FPATH' - cls = Wheel if fpath.endswith('.whl') else SDist - item = cls(fpath) - print(item.version) - ") - echo "WHEEL_FPATH=$WHEEL_FPATH" - echo "INSTALL_EXTRAS=$INSTALL_EXTRAS" - echo "UV_RESOLUTION=$UV_RESOLUTION" - - echo "MOD_VERSION=$MOD_VERSION" - - python -m pip install --prefer-binary "cmd_queue[$INSTALL_EXTRAS]==$MOD_VERSION" -f dist + - python -m pip install --prefer-binary "${WHEEL_FPATH}[${INSTALL_EXTRAS}]" - echo "Install finished." - echo "Creating test sandbox directory" - export WORKSPACE_DNAME="sandbox" @@ -304,30 +322,36 @@ stages: - 'echo "Installing helpers: setuptools"' - python -m uv pip install setuptools>=0.8 setuptools_scm wheel build -U - 'echo "Installing helpers: tomli and pkginfo"' - - python -m uv pip install tomli pkginfo + - python -m uv pip install tomli pkginfo packaging - |- export WHEEL_FPATH=$(python -c "if 1: import pathlib + from packaging import tags + from packaging.utils import parse_wheel_filename dist_dpath = pathlib.Path('dist') - candidates = list(dist_dpath.glob('cmd_queue*.whl')) - candidates += list(dist_dpath.glob('cmd_queue*.tar.gz')) - fpath = sorted(candidates)[-1] + wheels = sorted(dist_dpath.glob('cmd_queue*.whl')) + if wheels: + sys_tags = set(tags.sys_tags()) + matching = [] + for w in wheels: + try: + _, _, _, wheel_tags = parse_wheel_filename(w.name) + except Exception: + continue + if any(t in sys_tags for t in wheel_tags): + matching.append(w) + fpath = sorted(matching or wheels)[-1] + else: + sdists = sorted(dist_dpath.glob('cmd_queue*.tar.gz')) + if not sdists: + raise SystemExit('No wheel artifacts found in wheelhouse') + fpath = sdists[-1] print(str(fpath).replace(chr(92), chr(47))) ") - - |- - export MOD_VERSION=$(python -c "if 1: - from pkginfo import Wheel, SDist - import pathlib - fpath = '$WHEEL_FPATH' - cls = Wheel if fpath.endswith('.whl') else SDist - item = cls(fpath) - print(item.version) - ") - echo "WHEEL_FPATH=$WHEEL_FPATH" - echo "INSTALL_EXTRAS=$INSTALL_EXTRAS" - echo "UV_RESOLUTION=$UV_RESOLUTION" - - echo "MOD_VERSION=$MOD_VERSION" - - python -m pip install --prefer-binary "cmd_queue[$INSTALL_EXTRAS]==$MOD_VERSION" -f dist + - python -m pip install --prefer-binary "${WHEEL_FPATH}[${INSTALL_EXTRAS}]" - echo "Install finished." - echo "Creating test sandbox directory" - export WORKSPACE_DNAME="sandbox" @@ -359,29 +383,6 @@ test/sdist/minimal-loose/cp314-linux-x86_64: image: python:3.14 needs: - build/sdist -build/cp39-linux-x86_64: - <<: *build_wheel_template - image: python:3.9 -test/minimal-loose/cp39-linux-x86_64: - <<: *test_minimal-loose_template - image: python:3.9 - needs: - - build/cp39-linux-x86_64 -test/full-loose/cp39-linux-x86_64: - <<: *test_full-loose_template - image: python:3.9 - needs: - - build/cp39-linux-x86_64 -test/minimal-strict/cp39-linux-x86_64: - <<: *test_minimal-strict_template - image: python:3.9 - needs: - - build/cp39-linux-x86_64 -test/full-strict/cp39-linux-x86_64: - <<: *test_full-strict_template - image: python:3.9 - needs: - - build/cp39-linux-x86_64 build/cp310-linux-x86_64: <<: *build_wheel_template image: python:3.10 @@ -507,6 +508,9 @@ lint: - python -m pip install pip uv -U - python -m uv pip install -r requirements/linting.txt - ./run_linter.sh + - python -m pip install ty + - pip install -r requirements/runtime.txt + - ty check ./cmd_queue allow_failure: true gpgsign/wheels: <<: *common_template @@ -531,17 +535,16 @@ gpgsign/wheels: - export GPG_EXECUTABLE=gpg - export GPG_KEYID=$(cat dev/public_gpg_key) - echo "GPG_KEYID = $GPG_KEYID" - # Decrypt and import GPG Keys / trust - # note the variable pointed to by VARNAME_CI_SECRET is a protected variables only available on main and release branch - - source dev/secrets_configuration.sh - - CI_SECRET=${!VARNAME_CI_SECRET} - $GPG_EXECUTABLE --version - openssl version - $GPG_EXECUTABLE --list-keys - # note CI_KITWARE_SECRET is a protected variables only available on main and release branch - - CIS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CIS -d -a -in dev/ci_public_gpg_key.pgp.enc | $GPG_EXECUTABLE --import - - CIS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CIS -d -a -in dev/gpg_owner_trust.enc | $GPG_EXECUTABLE --import-ownertrust - - CIS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CIS -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | $GPG_EXECUTABLE --import + - echo "Importing GPG keys from CI secrets" + - printf '%s' "$GPG_PUBLIC_KEY_B64" | base64 -d | $GPG_EXECUTABLE --import + - printf '%s' "$GPG_OWNER_TRUST_B64" | base64 -d | $GPG_EXECUTABLE --import-ownertrust + - printf '%s' "$GPG_SECRET_SIGNING_SUBKEY_B64" | base64 -d | $GPG_EXECUTABLE --import + - "IMPORTED_FPR=$($GPG_EXECUTABLE --list-keys --with-colons \"$GPG_KEYID\" | awk -F: '/^fpr/ { print $10; exit }')" + - '[[ "$IMPORTED_FPR" == "$GPG_KEYID" ]] || { echo "ERROR: fingerprint mismatch: $IMPORTED_FPR != $GPG_KEYID"; exit 1; }' + - 'echo "GPG fingerprint verified: $IMPORTED_FPR"' - GPG_SIGN_CMD="$GPG_EXECUTABLE --batch --yes --detach-sign --armor --local-user $GPG_KEYID" - |- WHEEL_PATHS=(dist/*.whl dist/*.tar.gz) @@ -562,8 +565,6 @@ gpgsign/wheels: needs: - job: build/sdist artifacts: true - - job: build/cp39-linux-x86_64 - artifacts: true - job: build/cp310-linux-x86_64 artifacts: true - job: build/cp311-linux-x86_64 @@ -605,7 +606,6 @@ deploy/wheels: # do sed twice to handle the case of https clone with and without a read token URL_HOST=$(git remote get-url origin | sed -e 's|https\?://.*@||g' | sed -e 's|https\?://||g' | sed -e 's|git@||g' | sed -e 's|:|/|g') source dev/secrets_configuration.sh - CI_SECRET=${!VARNAME_CI_SECRET} PUSH_TOKEN=${!VARNAME_PUSH_TOKEN} echo "URL_HOST = $URL_HOST" # A git config user name and email is required. Set if needed. diff --git a/dev/setup_secrets.sh b/dev/setup_secrets.sh index 0a8efc9..ee607ab 100644 --- a/dev/setup_secrets.sh +++ b/dev/setup_secrets.sh @@ -139,6 +139,8 @@ setup_package_environs_github_erotemic(){ export VARNAME_TWINE_PASSWORD="EROTEMIC_PYPI_MASTER_TOKEN" export VARNAME_TEST_TWINE_PASSWORD="EROTEMIC_TEST_PYPI_MASTER_TOKEN" export VARNAME_TWINE_USERNAME="EROTEMIC_PYPI_MASTER_TOKEN_USERNAME" + export GITHUB_ENVIRONMENT_PYPI="pypi" + export GITHUB_ENVIRONMENT_TESTPYPI="testpypi" export VARNAME_TEST_TWINE_USERNAME="EROTEMIC_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=Erotemic-CI " ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh @@ -151,6 +153,8 @@ setup_package_environs_github_pyutils(){ export VARNAME_TWINE_PASSWORD="PYUTILS_PYPI_MASTER_TOKEN" export VARNAME_TEST_TWINE_PASSWORD="PYUTILS_TEST_PYPI_MASTER_TOKEN" export VARNAME_TWINE_USERNAME="PYUTILS_PYPI_MASTER_TOKEN_USERNAME" + export GITHUB_ENVIRONMENT_PYPI="pypi" + export GITHUB_ENVIRONMENT_TESTPYPI="testpypi" export VARNAME_TEST_TWINE_USERNAME="PYUTILS_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=PyUtils-CI " ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh @@ -162,21 +166,138 @@ setup_package_environs_github_pyutils(){ #' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh } +resolve_secret_value_from_varname_ptr(){ + local secret_varname_ptr="$1" + local secret_name="$2" + local secret_varname="${!secret_varname_ptr}" + if [[ "$secret_varname" == "" ]]; then + echo "Skipping $secret_name because $secret_varname_ptr is unset" >&2 + return 1 + fi + local secret_value="${!secret_varname}" + if [[ "$secret_value" == "" ]]; then + echo "Skipping $secret_name because $secret_varname is unset or empty" >&2 + return 1 + fi + printf '%s' "$secret_value" +} + +upload_one_github_secret(){ + local secret_name="$1" + local secret_value="$2" + local environment_name="${3:-}" + if [[ "$environment_name" == "" ]]; then + gh secret set "$secret_name" -b"$secret_value" + else + gh secret set "$secret_name" --env "$environment_name" -b"$secret_value" + fi +} + +github_repo_full_name(){ + local remote_url + remote_url="$(git remote get-url origin)" + if [[ "$remote_url" == git@github.com:* ]]; then + printf '%s' "${remote_url#git@github.com:}" | sed 's/\.git$//' + elif [[ "$remote_url" == https://github.com/* ]]; then + printf '%s' "${remote_url#https://github.com/}" | sed 's/\.git$//' + else + echo "Unable to determine GitHub repo from origin: $remote_url" >&2 + return 1 + fi +} + +ensure_github_environment(){ + local environment_name="$1" + local repo_full_name + repo_full_name="$(github_repo_full_name)" || return 1 + gh api --method PUT \ + -H "Accept: application/vnd.github+json" \ + "/repos/${repo_full_name}/environments/${environment_name}" >/dev/null +} + +setup_github_release_environments(){ + source dev/secrets_configuration.sh + local repo_full_name + local pypi_env + local testpypi_env + repo_full_name="$(github_repo_full_name)" || return 1 + pypi_env="${GITHUB_ENVIRONMENT_PYPI:-pypi}" + testpypi_env="${GITHUB_ENVIRONMENT_TESTPYPI:-testpypi}" + + ensure_github_environment "$testpypi_env" + ensure_github_environment "$pypi_env" + + echo "Ensured GitHub environments exist:" + echo " - $testpypi_env" + echo " - $pypi_env" + echo "Review environment protection rules manually as needed:" + echo " https://github.com/${repo_full_name}/settings/environments" + echo "Suggested policy:" + echo " - ${testpypi_env}: usually no approval required" + echo " - ${pypi_env}: require approval / reviewers and restrict to release refs" +} + upload_github_secrets(){ + local mode="${1:-legacy}" load_secrets unset GITHUB_TOKEN #printf "%s" "$GITHUB_TOKEN" | gh auth login --hostname Github.com --with-token if ! gh auth status ; then gh auth login fi + local secret_value + local pypi_env + local testpypi_env source dev/secrets_configuration.sh - gh secret set "TWINE_USERNAME" -b"${!VARNAME_TWINE_USERNAME}" - gh secret set "TEST_TWINE_USERNAME" -b"${!VARNAME_TEST_TWINE_USERNAME}" - toggle_setx_enter - gh secret set "CI_SECRET" -b"${!VARNAME_CI_SECRET}" - gh secret set "TWINE_PASSWORD" -b"${!VARNAME_TWINE_PASSWORD}" - gh secret set "TEST_TWINE_PASSWORD" -b"${!VARNAME_TEST_TWINE_PASSWORD}" - toggle_setx_exit + + if [[ "$mode" == "trusted_publishing" ]]; then + pypi_env="${GITHUB_ENVIRONMENT_PYPI:-pypi}" + testpypi_env="${GITHUB_ENVIRONMENT_TESTPYPI:-testpypi}" + setup_github_release_environments + toggle_setx_enter + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_CI_SECRET CI_SECRET) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "CI_SECRET" "$secret_value" "$pypi_env" + upload_one_github_secret "CI_SECRET" "$secret_value" "$testpypi_env" + fi + toggle_setx_exit + elif [[ "$mode" == "direct_gpg" ]]; then + # direct_ci GPG transport + non-trusted publishing. + # GPG material is already uploaded by upload_github_gpg_secrets. + # Upload Twine credentials environment-scoped (live password to pypi + # env, test password to testpypi env). CI_SECRET is not uploaded. + pypi_env="${GITHUB_ENVIRONMENT_PYPI:-pypi}" + testpypi_env="${GITHUB_ENVIRONMENT_TESTPYPI:-testpypi}" + setup_github_release_environments + toggle_setx_enter + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TWINE_USERNAME TWINE_USERNAME) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "TWINE_USERNAME" "$secret_value" "$pypi_env" + upload_one_github_secret "TWINE_USERNAME" "$secret_value" "$testpypi_env" + fi + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TEST_TWINE_USERNAME TEST_TWINE_USERNAME) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "TEST_TWINE_USERNAME" "$secret_value" "$testpypi_env" + fi + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TWINE_PASSWORD TWINE_PASSWORD) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "TWINE_PASSWORD" "$secret_value" "$pypi_env" + fi + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TEST_TWINE_PASSWORD TEST_TWINE_PASSWORD) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "TEST_TWINE_PASSWORD" "$secret_value" "$testpypi_env" + fi + toggle_setx_exit + else + # Legacy mode: all secrets repo-level, CI_SECRET included. + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TWINE_USERNAME TWINE_USERNAME) && upload_one_github_secret "TWINE_USERNAME" "$secret_value" + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TEST_TWINE_USERNAME TEST_TWINE_USERNAME) && upload_one_github_secret "TEST_TWINE_USERNAME" "$secret_value" + toggle_setx_enter + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_CI_SECRET CI_SECRET) && upload_one_github_secret "CI_SECRET" "$secret_value" + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TWINE_PASSWORD TWINE_PASSWORD) && upload_one_github_secret "TWINE_PASSWORD" "$secret_value" + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TEST_TWINE_PASSWORD TEST_TWINE_PASSWORD) && upload_one_github_secret "TEST_TWINE_PASSWORD" "$secret_value" + toggle_setx_exit + fi } @@ -224,15 +345,15 @@ upload_gitlab_group_secrets(){ fi TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX) - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" GROUP_ID=$(< "$TMP_DIR/all_group_info" jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") echo "GROUP_ID = $GROUP_ID" - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" < "$TMP_DIR/group_info" jq # Get group-level secret variables - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" > "$TMP_DIR/group_vars" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" > "$TMP_DIR/group_vars" < "$TMP_DIR/group_vars" jq '.[] | .key' if [[ "$?" != "0" ]]; then @@ -260,20 +381,26 @@ upload_gitlab_group_secrets(){ echo "Remove variable does not exist, posting" toggle_setx_enter - curl --request POST --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" \ - --form "key=${SECRET_VARNAME}" \ - --form "value=${LOCAL_VALUE}" \ - --form "protected=true" \ - --form "masked=true" \ - --form "environment_scope=*" \ - --form "variable_type=env_var" + curl --fail --silent --show-error \ + --request POST --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" \ + --form "key=${SECRET_VARNAME}" \ + --form "value=${LOCAL_VALUE}" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" toggle_setx_exit elif [[ "$REMOTE_VALUE" != "$LOCAL_VALUE" ]]; then echo "Remove variable does not agree, putting" # Update variable value toggle_setx_enter - curl --request PUT --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables/$SECRET_VARNAME" \ - --form "value=${LOCAL_VALUE}" + curl --fail --silent --show-error \ + --request PUT --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables/$SECRET_VARNAME" \ + --form "value=${LOCAL_VALUE}" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" toggle_setx_exit else echo "Remote value agrees with local" @@ -305,13 +432,13 @@ upload_gitlab_repo_secrets(){ TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX) toggle_setx_enter - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" toggle_setx_exit GROUP_ID=$(< "$TMP_DIR/all_group_info" jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") echo "GROUP_ID = $GROUP_ID" toggle_setx_enter - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" toggle_setx_exit GROUP_ID=$(< "$TMP_DIR/all_group_info" jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") < "$TMP_DIR/group_info" jq @@ -321,16 +448,25 @@ upload_gitlab_repo_secrets(){ # Get group-level secret variables toggle_setx_enter - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables" > "$TMP_DIR/project_vars" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables" > "$TMP_DIR/project_vars" toggle_setx_exit < "$TMP_DIR/project_vars" jq '.[] | .key' if [[ "$?" != "0" ]]; then echo "Failed to access project level variables. Probably a permission issue" fi + local mode="${1:-legacy}" + LIVE_MODE=1 source dev/secrets_configuration.sh - SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) + if [[ "$mode" == "direct_gpg" ]]; then + # In direct_ci transport mode the GPG key material is uploaded as + # project-level secrets by upload_gitlab_gpg_secrets; CI_SECRET is not + # needed. Only Twine and push-token secrets are uploaded here. + SECRET_VARNAME_ARR=(VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) + else + SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) + fi for SECRET_VARNAME_PTR in "${SECRET_VARNAME_ARR[@]}"; do SECRET_VARNAME=${!SECRET_VARNAME_PTR} echo "" @@ -349,13 +485,16 @@ upload_gitlab_repo_secrets(){ # New variable echo "Remove variable does not exist, posting" if [[ "$LIVE_MODE" == "1" ]]; then - curl --request POST --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables" \ - --form "key=${SECRET_VARNAME}" \ - --form "value=${LOCAL_VALUE}" \ - --form "protected=true" \ - --form "masked=true" \ - --form "environment_scope=*" \ - --form "variable_type=env_var" + curl --fail --silent --show-error \ + --request POST \ + --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables" \ + --form "key=${SECRET_VARNAME}" \ + --form "value=${LOCAL_VALUE}" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" else echo "dry run, not posting" fi @@ -363,8 +502,15 @@ upload_gitlab_repo_secrets(){ echo "Remove variable does not agree, putting" # Update variable value if [[ "$LIVE_MODE" == "1" ]]; then - curl --request PUT --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables/$SECRET_VARNAME" \ - --form "value=${LOCAL_VALUE}" + curl --fail --silent --show-error \ + --request PUT \ + --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables/$SECRET_VARNAME" \ + --form "value=${LOCAL_VALUE}" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" else echo "dry run, not putting" fi @@ -393,7 +539,10 @@ export_encrypted_code_signing_keys(){ # HOW TO ENCRYPT YOUR SECRET GPG KEY # You need to have a known public gpg key for this to make any sense - MAIN_GPG_KEYID=$(gpg --list-keys --keyid-format LONG "$GPG_IDENTIFIER" | head -n 2 | tail -n 1 | awk '{print $1}') + # Full primary-key fingerprint (40 hex chars) — more collision-resistant + # than the 16-char LONG key ID. Uses machine-parseable colon format so + # the extraction is stable across gpg output layout changes. + MAIN_GPG_FPR=$(gpg --list-keys --with-colons "$GPG_IDENTIFIER" | awk -F: '/^fpr/ { print $10; exit }') GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" | grep "\[S\]" -A 1 | tail -n 1 | awk '{print $1}') # Careful, if you don't have a subkey, requesting it will export more than you want. # Export the main key instead (its better to have subkeys, but this is a lesser evil) @@ -404,7 +553,7 @@ export_encrypted_code_signing_keys(){ # anyway. GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" | grep "\[C\]" -A 1 | tail -n 1 | awk '{print $1}') fi - echo "MAIN_GPG_KEYID = $MAIN_GPG_KEYID" + echo "MAIN_GPG_FPR = $MAIN_GPG_FPR" echo "GPG_SIGN_SUBKEY = $GPG_SIGN_SUBKEY" # Only export the signing secret subkey @@ -418,9 +567,10 @@ export_encrypted_code_signing_keys(){ GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -e -a -in dev/ci_public_gpg_key.pgp > dev/ci_public_gpg_key.pgp.enc GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -e -a -in dev/ci_secret_gpg_subkeys.pgp > dev/ci_secret_gpg_subkeys.pgp.enc GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -e -a -in dev/gpg_owner_trust > dev/gpg_owner_trust.enc - echo "$MAIN_GPG_KEYID" > dev/public_gpg_key + # Store the full fingerprint as the public signer anchor + printf '%s\n' "$MAIN_GPG_FPR" > dev/public_gpg_key - # Test decrpyt + # Test decrypt GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_public_gpg_key.pgp.enc | gpg --list-packets --verbose GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | gpg --list-packets --verbose GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/gpg_owner_trust.enc @@ -434,7 +584,6 @@ export_encrypted_code_signing_keys(){ rm dev/gpg_owner_trust git status git add dev/*.enc - git add dev/gpg_owner_trust git add dev/public_gpg_key } @@ -444,6 +593,207 @@ export_encrypted_code_signing_keys(){ #} +_gpg_locate_signing_subkey(){ + __doc__=" + Internal helper. Sets MAIN_GPG_FPR and GPG_SIGN_SUBKEY in the caller's + scope. Exits non-zero and prints a diagnostic if either cannot be found. + Requires GPG_IDENTIFIER to already be set. + " + MAIN_GPG_FPR=$(gpg --list-keys --with-colons "$GPG_IDENTIFIER" \ + | awk -F: '/^fpr/ { print $10; exit }') + GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" \ + | grep "\[S\]" -A 1 | tail -n 1 | awk '{print $1}') + if [[ "$GPG_SIGN_SUBKEY" == "" ]]; then + echo "WARNING: no [S] subkey found for $GPG_IDENTIFIER, falling back to [C] key" >&2 + GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" \ + | grep "\[C\]" -A 1 | tail -n 1 | awk '{print $1}') + fi + if [[ -z "$MAIN_GPG_FPR" ]]; then + echo "ERROR: could not determine primary key fingerprint for $GPG_IDENTIFIER" >&2 + return 1 + fi + if [[ -z "$GPG_SIGN_SUBKEY" ]]; then + echo "ERROR: could not find a signing subkey for $GPG_IDENTIFIER" >&2 + return 1 + fi + echo "MAIN_GPG_FPR = $MAIN_GPG_FPR" + echo "GPG_SIGN_SUBKEY = $GPG_SIGN_SUBKEY" +} + + +upload_github_gpg_secrets(){ + __doc__=" + Export GPG signing subkey material and upload it directly to GitHub + Actions as environment-scoped secrets (pypi + testpypi environments). + Also writes dev/public_gpg_key with the full primary key fingerprint + and stages it for commit. + + No .enc files are written to disk or committed to git. + This implements ci_gpg_secret_transport = 'direct_ci' for GitHub. + Call this instead of export_encrypted_code_signing_keys. + " + load_secrets + source dev/secrets_configuration.sh + + local pypi_env="${GITHUB_ENVIRONMENT_PYPI:-pypi}" + local testpypi_env="${GITHUB_ENVIRONMENT_TESTPYPI:-testpypi}" + + _gpg_locate_signing_subkey || return 1 + + local TMP_DIR + TMP_DIR=$(mktemp -d -t gpg-ci-XXXXXXXXXX) + # shellcheck disable=SC2064 + trap "rm -rf '$TMP_DIR'" RETURN + + # Export signing subkey secret material and associated public key + gpg --armor --export-options export-backup \ + --export-secret-subkeys "${GPG_SIGN_SUBKEY}!" > "$TMP_DIR/signing_subkey.pgp" + gpg --armor --export "${GPG_SIGN_SUBKEY}" > "$TMP_DIR/public_key.pgp" + gpg --export-ownertrust > "$TMP_DIR/owner_trust" + + # Single-line base64 for robust secret transport (tr -d '\n' is + # portable across GNU and macOS; avoids -w 0 / -b 0 divergence). + local GPG_SECRET_SIGNING_SUBKEY_B64 GPG_PUBLIC_KEY_B64 GPG_OWNER_TRUST_B64 + GPG_SECRET_SIGNING_SUBKEY_B64=$(base64 < "$TMP_DIR/signing_subkey.pgp" | tr -d '\n') + GPG_PUBLIC_KEY_B64=$(base64 < "$TMP_DIR/public_key.pgp" | tr -d '\n') + GPG_OWNER_TRUST_B64=$(base64 < "$TMP_DIR/owner_trust" | tr -d '\n') + + if [[ -z "$GPG_SECRET_SIGNING_SUBKEY_B64" ]]; then + echo "ERROR: signing subkey export is empty — aborting" >&2 + return 1 + fi + + # Write the public fingerprint anchor to the repo. + # This file is the only GPG artifact committed in direct_ci mode. + mkdir -p dev + printf '%s\n' "$MAIN_GPG_FPR" > dev/public_gpg_key + git add dev/public_gpg_key + git status + + unload_secrets + + # Ensure deployment environments exist before scoping secrets to them + setup_github_release_environments + + if ! gh auth status; then gh auth login; fi + + toggle_setx_enter + for env_name in "$pypi_env" "$testpypi_env"; do + upload_one_github_secret "GPG_SECRET_SIGNING_SUBKEY_B64" \ + "$GPG_SECRET_SIGNING_SUBKEY_B64" "$env_name" + upload_one_github_secret "GPG_PUBLIC_KEY_B64" \ + "$GPG_PUBLIC_KEY_B64" "$env_name" + upload_one_github_secret "GPG_OWNER_TRUST_B64" \ + "$GPG_OWNER_TRUST_B64" "$env_name" + done + toggle_setx_exit +} + + +upload_gitlab_gpg_secrets(){ + __doc__=" + Export GPG signing subkey material and upload it directly to GitLab + CI/CD project variables (protected=true, masked=true). + Also writes dev/public_gpg_key with the full primary key fingerprint + and stages it for commit. + + No .enc files are written to disk or committed to git. + This implements ci_gpg_secret_transport = 'direct_ci' for GitLab. + Call this instead of export_encrypted_code_signing_keys. + " + load_secrets + source dev/secrets_configuration.sh + + _gpg_locate_signing_subkey || return 1 + + local TMP_DIR + TMP_DIR=$(mktemp -d -t gpg-ci-XXXXXXXXXX) + # shellcheck disable=SC2064 + trap "rm -rf '$TMP_DIR'" RETURN + + gpg --armor --export-options export-backup \ + --export-secret-subkeys "${GPG_SIGN_SUBKEY}!" > "$TMP_DIR/signing_subkey.pgp" + gpg --armor --export "${GPG_SIGN_SUBKEY}" > "$TMP_DIR/public_key.pgp" + gpg --export-ownertrust > "$TMP_DIR/owner_trust" + + local GPG_SECRET_SIGNING_SUBKEY_B64 GPG_PUBLIC_KEY_B64 GPG_OWNER_TRUST_B64 + GPG_SECRET_SIGNING_SUBKEY_B64=$(base64 < "$TMP_DIR/signing_subkey.pgp" | tr -d '\n') + GPG_PUBLIC_KEY_B64=$(base64 < "$TMP_DIR/public_key.pgp" | tr -d '\n') + GPG_OWNER_TRUST_B64=$(base64 < "$TMP_DIR/owner_trust" | tr -d '\n') + + if [[ -z "$GPG_SECRET_SIGNING_SUBKEY_B64" ]]; then + echo "ERROR: signing subkey export is empty — aborting" >&2 + return 1 + fi + + # Write the public fingerprint anchor to the repo. + mkdir -p dev + printf '%s\n' "$MAIN_GPG_FPR" > dev/public_gpg_key + git add dev/public_gpg_key + git status + + # Locate the GitLab project via git remote + local REMOTE=origin + local HOST + HOST=https://$(git remote get-url $REMOTE \ + | cut -d "/" -f 1 | cut -d "@" -f 2 | cut -d ":" -f 1) + local PRIVATE_GITLAB_TOKEN + PRIVATE_GITLAB_TOKEN=$(git_token_for "$HOST") + if [[ "$PRIVATE_GITLAB_TOKEN" == "ERROR" ]]; then + echo "ERROR: failed to load GitLab authentication token" >&2 + return 1 + fi + + local PROJECT_PATH + PROJECT_PATH=$(git remote get-url $REMOTE | cut -d ":" -f 2 | sed 's/\.git$//') + local PROJECT_ID + PROJECT_ID=$(curl --fail --show-error --silent --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects?search=$(basename "$PROJECT_PATH")" \ + | jq -r ".[] | select(.path_with_namespace==\"$PROJECT_PATH\") | .id") + if [[ -z "$PROJECT_ID" ]]; then + echo "ERROR: could not determine GitLab project ID for $PROJECT_PATH" >&2 + return 1 + fi + echo "PROJECT_ID = $PROJECT_ID" + + _gitlab_upsert_protected_var(){ + local key="$1" value="$2" + local existing + existing=$(curl -s --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables/$key" \ + | jq -r '.key // empty') + if [[ -z "$existing" ]]; then + curl --fail --silent --show-error --request POST \ + --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables" \ + --form "key=$key" \ + --form "value=$value" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" + else + curl --fail --silent --show-error --request PUT \ + --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables/$key" \ + --form "value=$value" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" + fi + } + + unload_secrets + + toggle_setx_enter + _gitlab_upsert_protected_var "GPG_SECRET_SIGNING_SUBKEY_B64" "$GPG_SECRET_SIGNING_SUBKEY_B64" + _gitlab_upsert_protected_var "GPG_PUBLIC_KEY_B64" "$GPG_PUBLIC_KEY_B64" + _gitlab_upsert_protected_var "GPG_OWNER_TRUST_B64" "$GPG_OWNER_TRUST_B64" + toggle_setx_exit +} + + _test_gnu(){ # shellcheck disable=SC2155 export GNUPGHOME=$(mktemp -d -t) diff --git a/docs/source/conf.py b/docs/source/conf.py index 8ca8009..f190416 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -110,7 +110,6 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) - # -- Project information ----------------------------------------------------- import sphinx_rtd_theme from os.path import exists @@ -123,23 +122,27 @@ def parse_version(fpath): Statically parse the version number from a python file """ import ast + if not exists(fpath): raise ValueError('fpath={!r} does not exist'.format(fpath)) with open(fpath, 'r') as file_: sourcecode = file_.read() pt = ast.parse(sourcecode) + class VersionVisitor(ast.NodeVisitor): def visit_Assign(self, node): for target in node.targets: if getattr(target, 'id', None) == '__version__': self.version = node.value.s + visitor = VersionVisitor() visitor.visit(pt) return visitor.version + project = 'cmd_queue' -copyright = '2026, Kitware Inc. Jon Crall' -author = 'Kitware Inc. Jon Crall' +copyright = '2026, Kitware Inc., Jon Crall' +author = 'Kitware Inc., Jon Crall' modname = 'cmd_queue' repo_dpath = dirname(dirname(dirname(__file__))) @@ -182,8 +185,8 @@ def visit_Assign(self, node): napoleon_use_param = False napoleon_use_ivar = True -#autoapi_type = 'python' -#autoapi_dirs = [mod_dpath] +# autoapi_type = 'python' +# autoapi_dirs = [mod_dpath] autodoc_inherit_docstrings = False @@ -198,7 +201,8 @@ def visit_Assign(self, node): ] autodoc_default_options = { # Document callable classes - 'special-members': '__call__'} + 'special-members': '__call__' +} autodoc_member_order = 'bysource' autoclass_content = 'both' @@ -233,16 +237,13 @@ def visit_Assign(self, node): 'networkx': ('https://networkx.org/documentation/stable/', None), 'scriptconfig': ('https://scriptconfig.readthedocs.io/en/latest/', None), 'rich': ('https://rich.readthedocs.io/en/latest/', None), - 'numpy': ('https://numpy.org/doc/stable/', None), 'sympy': ('https://docs.sympy.org/latest/', None), 'scikit-learn': ('https://scikit-learn.org/stable/', None), 'pandas': ('https://pandas.pydata.org/docs/', None), 'matplotlib': ('https://matplotlib.org/stable/', None), - 'pytest': ('https://docs.pytest.org/en/latest/', None), 'platformdirs': ('https://platformdirs.readthedocs.io/en/latest/', None), - 'timerit': ('https://timerit.readthedocs.io/en/latest/', None), 'progiter': ('https://progiter.readthedocs.io/en/latest/', None), 'dateutil': ('https://dateutil.readthedocs.io/en/latest/', None), @@ -358,15 +359,12 @@ def visit_Assign(self, node): # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -376,8 +374,13 @@ def visit_Assign(self, node): # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'cmd_queue.tex', 'cmd_queue Documentation', - 'Kitware Inc. Jon Crall', 'manual'), + ( + master_doc, + 'cmd_queue.tex', + 'cmd_queue Documentation', + 'Kitware Inc., Jon Crall', + 'manual', + ), ] @@ -385,10 +388,7 @@ def visit_Assign(self, node): # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'cmd_queue', 'cmd_queue Documentation', - [author], 1) -] +man_pages = [(master_doc, 'cmd_queue', 'cmd_queue Documentation', [author], 1)] # -- Options for Texinfo output ---------------------------------------------- @@ -397,14 +397,21 @@ def visit_Assign(self, node): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'cmd_queue', 'cmd_queue Documentation', - author, 'cmd_queue', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + 'cmd_queue', + 'cmd_queue Documentation', + author, + 'cmd_queue', + 'One line description of project.', + 'Miscellaneous', + ), ] # -- Extension configuration ------------------------------------------------- from sphinx.domains.python import PythonDomain # NOQA + # from sphinx.application import Sphinx # NOQA from typing import Any, List # NOQA @@ -414,6 +421,7 @@ def visit_Assign(self, node): MAX_TIME_MINUTES = None if MAX_TIME_MINUTES: import ubelt # NOQA + TIMER = ubelt.Timer() TIMER.tic() @@ -423,7 +431,10 @@ class PatchedPythonDomain(PythonDomain): References: https://github.com/sphinx-doc/sphinx/issues/3866 """ - def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode): + + def resolve_xref( + self, env, fromdocname, builder, type, target, node, contnode + ): """ Helps to resolves cross-references """ @@ -432,7 +443,8 @@ def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode): if target.startswith('xdoc.'): target = 'xdoctest.' + target[3] return_value = super(PatchedPythonDomain, self).resolve_xref( - env, fromdocname, builder, typ, target, node, contnode) + env, fromdocname, builder, type, target, node, contnode + ) return return_value @@ -460,6 +472,7 @@ def register_section(self, tag, alias=None): alias = [alias] if not isinstance(alias, (list, tuple, set)) else alias alias.append(tag) alias = tuple(alias) + # TODO: better tag patterns def _wrap(func): self.registry[tag] = { @@ -468,6 +481,7 @@ def _wrap(func): 'func': func, } return func + return _wrap def _register_builtins(self): @@ -485,9 +499,12 @@ def commandline(lines): new_lines.extend(lines[1:]) return new_lines - @self.register_section(tag='SpecialExample', alias=['Benchmark', 'Sympy', 'Doctest']) + @self.register_section( + tag='SpecialExample', alias=['Benchmark', 'Sympy', 'Doctest'] + ) def benchmark(lines): import textwrap + new_lines = [] tag = lines[0].replace(':', '').strip() # new_lines.append(lines[0]) # TODO: it would be nice to change the tagline. @@ -560,7 +577,7 @@ def process(self, lines): accum = [] def accept(): - """ called when we finish reading a section """ + """called when we finish reading a section""" if curr_mode == '__doc__': # Keep the lines as-is new_lines.extend(accum) @@ -574,7 +591,6 @@ def accept(): accum[:] = [] for line in orig_lines: - found = None for regitem in self.registry.values(): if line.startswith(regitem['alias']): @@ -604,8 +620,15 @@ def accept(): return lines - def process_docstring_callback(self, app, what_: str, name: str, obj: Any, - options: Any, lines: List[str]) -> None: + def process_docstring_callback( + self, + app, + what_: str, + name: str, + obj: Any, + options: Any, + lines: List[str], + ) -> None: """ Callback to be registered to autodoc-process-docstring @@ -634,7 +657,9 @@ def process_docstring_callback(self, app, what_: str, name: str, obj: Any, https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html """ if self.debug: - print(f'ProcessDocstring: name={name}, what_={what_}, num_lines={len(lines)}') + print( + f'ProcessDocstring: name={name}, what_={what_}, num_lines={len(lines)}' + ) # print('BEFORE:') # import ubelt as ub @@ -666,9 +691,9 @@ def process_docstring_callback(self, app, what_: str, name: str, obj: Any, FIX_EXAMPLE_FORMATTING = 1 if FIX_EXAMPLE_FORMATTING: for idx, line in enumerate(lines): - if line == "Example:": - lines[idx] = "**Example:**" - lines.insert(idx + 1, "") + if line == 'Example:': + lines[idx] = '**Example:**' + lines.insert(idx + 1, '') REFORMAT_SECTIONS = 0 if REFORMAT_SECTIONS: @@ -710,7 +735,7 @@ def process_docstring_callback(self, app, what_: str, name: str, obj: Any, text = found['text'] new_lines = [] for para in text.split('\n\n'): - indent = para[:len(para) - len(para.lstrip())] + indent = para[: len(para) - len(para.lstrip())] new_paragraph = indent + paragraph(para) new_lines.append(new_paragraph) new_lines.append('') @@ -729,11 +754,13 @@ class SphinxDocstring: """ Helper to parse and modify sphinx docstrings """ + def __init__(docstr, lines): docstr.lines = lines # FORMAT THE RETURNS SECTION A BIT NICER import re + tag_pat = re.compile(r'^:(\w*):') directive_pat = re.compile(r'^.. (\w*)::\s*(\w*)') @@ -744,16 +771,22 @@ def __init__(docstr, lines): directive_match = directive_pat.search(line) if tag_match: tag = tag_match.groups()[0] - sphinx_parts.append({ - 'tag': tag, 'start_offset': idx, - 'type': 'tag', - }) + sphinx_parts.append( + { + 'tag': tag, + 'start_offset': idx, + 'type': 'tag', + } + ) elif directive_match: tag = directive_match.groups()[0] - sphinx_parts.append({ - 'tag': tag, 'start_offset': idx, - 'type': 'directive', - }) + sphinx_parts.append( + { + 'tag': tag, + 'start_offset': idx, + 'type': 'directive', + } + ) prev_offset = len(lines) for part in sphinx_parts[::-1]: @@ -793,6 +826,7 @@ def paragraph(text): str: the reduced text block """ import re + out = re.sub(r'\s\s*', ' ', text).strip() return out @@ -802,9 +836,12 @@ def create_doctest_figure(app, obj, name, lines): The idea is that each doctest that produces a figure should generate that and then that figure should be part of the docs. """ - import xdoctest import sys import types + + import xdoctest + import xdoctest.core + if isinstance(obj, types.ModuleType): module = obj else: @@ -818,14 +855,15 @@ def create_doctest_figure(app, obj, name, lines): # print(doctest.format_src()) import pathlib + # HACK: write to the srcdir doc_outdir = pathlib.Path(app.outdir) doc_srcdir = pathlib.Path(app.srcdir) doc_static_outdir = doc_outdir / '_static' doc_static_srcdir = doc_srcdir / '_static' - src_fig_dpath = (doc_static_srcdir / 'images') + src_fig_dpath = doc_static_srcdir / 'images' src_fig_dpath.mkdir(exist_ok=True, parents=True) - out_fig_dpath = (doc_static_outdir / 'images') + out_fig_dpath = doc_static_outdir / 'images' out_fig_dpath.mkdir(exist_ok=True, parents=True) # fig_dpath = (doc_outdir / 'autofigs' / name).mkdir(exist_ok=True) @@ -833,6 +871,7 @@ def create_doctest_figure(app, obj, name, lines): fig_num = 1 import kwplot + kwplot.autompl(force='agg') plt = kwplot.autoplt() @@ -843,7 +882,10 @@ def create_doctest_figure(app, obj, name, lines): # so we can get different figures. But we can hack it for now. import re - split_parts = re.split('({}\\s*\n)'.format(re.escape('.. rubric:: Example')), docstr) + + split_parts = re.split( + '({}\\s*\n)'.format(re.escape('.. rubric:: Example')), docstr + ) # split_parts = docstr.split('.. rubric:: Example') # import xdev @@ -853,7 +895,9 @@ def doctest_line_offsets(doctest): # Where the doctests starts and ends relative to the file start_line_offset = doctest.lineno - 1 last_part = doctest._parts[-1] - last_line_offset = start_line_offset + last_part.line_offset + last_part.n_lines - 1 + last_line_offset = ( + start_line_offset + last_part.line_offset + last_part.n_lines - 1 + ) offsets = { 'start': start_line_offset, 'end': last_line_offset, @@ -870,10 +914,14 @@ def doctest_line_offsets(doctest): for part in split_parts: num_lines = part.count('\n') - doctests = list(xdoctest.core.parse_docstr_examples( - part, modpath=modpath, callname=name, - # style='google' - )) + doctests = list( + xdoctest.core.parse_docstr_examples( + part, + modpath=modpath, + callname=name, + # style='google' + ) + ) # print(doctests) # doctests = list(xdoctest.core.parse_docstr_examples( @@ -894,6 +942,7 @@ def doctest_line_offsets(doctest): # Define dummy skipped exception if pytest is not available class Skipped(Exception): pass + try: doctest.mode = 'native' doctest.run(verbose=0, on_error='raise') @@ -913,19 +962,23 @@ class Skipped(Exception): fig_num += 1 # path_name = path_sanatize(name) path_name = (name).replace('.', '_') - fig_fpath = src_fig_dpath / f'fig_{path_name}_{fig_num:03d}.jpeg' + fig_fpath = ( + src_fig_dpath / f'fig_{path_name}_{fig_num:03d}.jpeg' + ) fig.savefig(fig_fpath) print(f'Wrote figure: {fig_fpath}') - to_insert_fpaths.append({ - 'insert_line_index': insert_line_index, - 'fpath': fig_fpath, - }) + to_insert_fpaths.append( + { + 'insert_line_index': insert_line_index, + 'fpath': fig_fpath, + } + ) for fig in figures: plt.close(fig) # kwplot.close_figures(figures) - curr_line_offset += (num_lines) + curr_line_offset += num_lines # if len(doctests) > 1: # doctests @@ -938,6 +991,7 @@ class Skipped(Exception): end_index = len(lines) # Reverse order for inserts import shutil + for info in to_insert_fpaths[::-1]: src_abs_fpath = info['fpath'] @@ -966,7 +1020,9 @@ class Skipped(Exception): insert_index = end_index else: raise KeyError(INSERT_AT) - lines.insert(insert_index, '.. image:: {}'.format('..' / rel_to_root_fpath)) + lines.insert( + insert_index, '.. image:: {}'.format('..' / rel_to_root_fpath) + ) # lines.insert(insert_index, '.. image:: {}'.format(rel_to_root_fpath)) # lines.insert(insert_index, '.. image:: {}'.format(rel_to_static_fpath)) lines.insert(insert_index, '') @@ -979,8 +1035,10 @@ def postprocess_hyperlinks(app, doctree, docname): "autodoc-process-docstring" event. """ # Your hyperlink postprocessing logic here - from docutils import nodes import pathlib + + from docutils import nodes + for node in doctree.traverse(nodes.reference): if 'refuri' in node.attributes: refuri = node.attributes['refuri'] @@ -989,13 +1047,15 @@ def postprocess_hyperlinks(app, doctree, docname): fpath = pathlib.Path(node.document['source']) parent_dpath = fpath.parent if (parent_dpath / refuri).exists(): - node.attributes['refuri'] = refuri.replace('.rst', '.html') + node.attributes['refuri'] = refuri.replace( + '.rst', '.html' + ) else: raise AssertionError def fix_rst_todo_section(lines): - new_lines = [] + # new_lines = [] for line in lines: ... ... @@ -1003,17 +1063,23 @@ def fix_rst_todo_section(lines): def setup(app): import sphinx - app : sphinx.application.Sphinx = app + import sphinx.application + + app: sphinx.application.Sphinx = app app.add_domain(PatchedPythonDomain, override=True) - app.connect("doctree-resolved", postprocess_hyperlinks) + app.connect('doctree-resolved', postprocess_hyperlinks) docstring_processor = GoogleStyleDocstringProcessor() # https://stackoverflow.com/questions/26534184/can-sphinx-ignore-certain-tags-in-python-docstrings - app.connect('autodoc-process-docstring', docstring_processor.process_docstring_callback) + app.connect( + 'autodoc-process-docstring', + docstring_processor.process_docstring_callback, + ) def copy(src, dst): import shutil + print(f'Copy {src} -> {dst}') assert src.exists() if not dst.parent.exists(): @@ -1024,16 +1090,17 @@ def copy(src, dst): HACK_FOR_KWCOCO = 0 if HACK_FOR_KWCOCO: import pathlib + doc_outdir = pathlib.Path(app.outdir) / 'auto' doc_srcdir = pathlib.Path(app.srcdir) / 'auto' mod_dpath = doc_srcdir / '../../../kwcoco' - src_fpath = (mod_dpath / 'coco_schema.json') + src_fpath = mod_dpath / 'coco_schema.json' copy(src_fpath, doc_outdir / src_fpath.name) copy(src_fpath, doc_srcdir / src_fpath.name) - src_fpath = (mod_dpath / 'coco_schema_informal.rst') + src_fpath = mod_dpath / 'coco_schema_informal.rst' copy(src_fpath, doc_outdir / src_fpath.name) copy(src_fpath, doc_srcdir / src_fpath.name) return app diff --git a/setup.py b/setup.py index 64c3858..a71284d 100755 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def parse_version(fpath): """ Statically parse the version number from a python file """ - value = static_parse("__version__", fpath) + value = static_parse('__version__', fpath) return value @@ -23,15 +23,15 @@ def static_parse(varname, fpath): import ast if not exists(fpath): - raise ValueError("fpath={!r} does not exist".format(fpath)) - with open(fpath, "r") as file_: + raise ValueError('fpath={!r} does not exist'.format(fpath)) + with open(fpath, 'r') as file_: sourcecode = file_.read() pt = ast.parse(sourcecode) class StaticVisitor(ast.NodeVisitor): def visit_Assign(self, node): for target in node.targets: - if getattr(target, "id", None) == varname: + if getattr(target, 'id', None) == varname: try: self.static_value = node.value.value except AttributeError: @@ -44,7 +44,7 @@ def visit_Assign(self, node): except AttributeError: import warnings - value = "Unknown {}".format(varname) + value = 'Unknown {}'.format(varname) warnings.warn(value) return value @@ -57,16 +57,16 @@ def parse_description(): pandoc --from=markdown --to=rst --output=README.rst README.md python -c "import setup; print(setup.parse_description())" """ - readme_fpath = join(dirname(__file__), "README.rst") + readme_fpath = join(dirname(__file__), 'README.rst') # This breaks on pip install, so check that it exists. if exists(readme_fpath): - with open(readme_fpath, "r") as f: + with open(readme_fpath, 'r') as f: text = f.read() return text - return "" + return '' -def parse_requirements(fname="requirements.txt", versions=False): +def parse_requirements(fname='requirements.txt', versions=False): """ Parse the package dependencies listed in a requirements file but strips specific versioning information. @@ -85,7 +85,7 @@ def parse_requirements(fname="requirements.txt", versions=False): """ require_fpath = fname - def parse_line(line, dpath=""): + def parse_line(line, dpath=''): """ Parse information from a line in a requirements text file @@ -93,75 +93,77 @@ def parse_line(line, dpath=""): line = '-e git+https://a.com/somedep@sometag#egg=SomeDep' """ # Remove inline comments - comment_pos = line.find(" #") + comment_pos = line.find(' #') if comment_pos > -1: line = line[:comment_pos] - if line.startswith("-r "): + if line.startswith('-r '): # Allow specifying requirements in other files - target = join(dpath, line.split(" ")[1]) + target = join(dpath, line.split(' ')[1]) for info in parse_require_file(target): yield info else: # See: https://www.python.org/dev/peps/pep-0508/ - info = {"line": line} - if line.startswith("-e "): - info["package"] = line.split("#egg=")[1] + info = {'line': line} + if line.startswith('-e '): + info['package'] = line.split('#egg=')[1] else: - if "--find-links" in line: + if '--find-links' in line: # setuptools does not seem to handle find links - line = line.split("--find-links")[0] - if ";" in line: - pkgpart, platpart = line.split(";") + line = line.split('--find-links')[0] + if ';' in line: + pkgpart, platpart = line.split(';') # Handle platform specific dependencies # setuptools.readthedocs.io/en/latest/setuptools.html # #declaring-platform-specific-dependencies plat_deps = platpart.strip() - info["platform_deps"] = plat_deps + info['platform_deps'] = plat_deps else: pkgpart = line platpart = None # Remove versioning from the package - pat = "(" + "|".join([">=", "==", ">"]) + ")" + pat = '(' + '|'.join(['>=', '==', '>']) + ')' parts = re.split(pat, pkgpart, maxsplit=1) parts = [p.strip() for p in parts] - info["package"] = parts[0] + info['package'] = parts[0] if len(parts) > 1: op, rest = parts[1:] version = rest # NOQA - info["version"] = (op, version) + info['version'] = (op, version) yield info def parse_require_file(fpath): dpath = dirname(fpath) - with open(fpath, "r") as f: + with open(fpath, 'r') as f: for line in f.readlines(): line = line.strip() - if line and not line.startswith("#"): + if line and not line.startswith('#'): for info in parse_line(line, dpath=dpath): yield info def gen_packages_items(): if exists(require_fpath): for info in parse_require_file(require_fpath): - parts = [info["package"]] - if versions and "version" in info: - if versions == "strict": + parts = [info['package']] + if versions and 'version' in info: + if versions == 'strict': # In strict mode, we pin to the minimum version - if info["version"]: + if info['version']: # Only replace the first >= instance - verstr = "".join(info["version"]).replace(">=", "==", 1) + verstr = ''.join(info['version']).replace( + '>=', '==', 1 + ) parts.append(verstr) else: - parts.extend(info["version"]) - if not sys.version.startswith("3.4"): + parts.extend(info['version']) + if not sys.version.startswith('3.4'): # apparently package_deps are broken in 3.4 - plat_deps = info.get("platform_deps") + plat_deps = info.get('platform_deps') if plat_deps is not None: - parts.append(";" + plat_deps) - item = "".join(parts) + parts.append(';' + plat_deps) + item = ''.join(parts) if item: yield item @@ -199,67 +201,77 @@ def gen_packages_items(): # return requirements -NAME = "cmd_queue" -INIT_PATH = "cmd_queue/__init__.py" +NAME = 'cmd_queue' +INIT_PATH = 'cmd_queue/__init__.py' VERSION = parse_version(INIT_PATH) -if __name__ == "__main__": +if __name__ == '__main__': setupkw = {} - setupkw["install_requires"] = parse_requirements( - "requirements/runtime.txt", versions="loose" + setupkw['install_requires'] = parse_requirements( + 'requirements/runtime.txt', versions='loose' ) - setupkw["extras_require"] = { - "all": parse_requirements("requirements.txt", versions="loose"), - "runtime": parse_requirements("requirements/runtime.txt", versions="loose"), - "tests": parse_requirements("requirements/tests.txt", versions="loose"), - "optional": parse_requirements("requirements/optional.txt", versions="loose"), - "airflow": parse_requirements("requirements/airflow.txt", versions="loose"), - "docs": parse_requirements("requirements/docs.txt", versions="loose"), - "linting": parse_requirements("requirements/linting.txt", versions="loose"), - "all-strict": parse_requirements("requirements.txt", versions="strict"), - "runtime-strict": parse_requirements( - "requirements/runtime.txt", versions="strict" + setupkw['extras_require'] = { + 'all': parse_requirements('requirements.txt', versions='loose'), + 'runtime': parse_requirements( + 'requirements/runtime.txt', versions='loose' ), - "tests-strict": parse_requirements("requirements/tests.txt", versions="strict"), - "optional-strict": parse_requirements( - "requirements/optional.txt", versions="strict" + 'tests': parse_requirements('requirements/tests.txt', versions='loose'), + 'optional': parse_requirements( + 'requirements/optional.txt', versions='loose' ), - "airflow-strict": parse_requirements( - "requirements/airflow.txt", versions="strict" + 'airflow': parse_requirements( + 'requirements/airflow.txt', versions='loose' ), - "docs-strict": parse_requirements("requirements/docs.txt", versions="strict"), - "linting-strict": parse_requirements( - "requirements/linting.txt", versions="strict" + 'docs': parse_requirements('requirements/docs.txt', versions='loose'), + 'linting': parse_requirements( + 'requirements/linting.txt', versions='loose' + ), + 'all-strict': parse_requirements('requirements.txt', versions='strict'), + 'runtime-strict': parse_requirements( + 'requirements/runtime.txt', versions='strict' + ), + 'tests-strict': parse_requirements( + 'requirements/tests.txt', versions='strict' + ), + 'optional-strict': parse_requirements( + 'requirements/optional.txt', versions='strict' + ), + 'airflow-strict': parse_requirements( + 'requirements/airflow.txt', versions='strict' + ), + 'docs-strict': parse_requirements( + 'requirements/docs.txt', versions='strict' + ), + 'linting-strict': parse_requirements( + 'requirements/linting.txt', versions='strict' ), } - setupkw["name"] = NAME - setupkw["version"] = VERSION - setupkw["author"] = "Kitware Inc., Jon Crall" - setupkw["author_email"] = "kitware@kitware.com, jon.crall@kitware.com" - setupkw["url"] = "https://gitlab.kitware.com/computer-vision/cmd_queue" - setupkw["description"] = "The cmd_queue module for a DAG of bash commands" - setupkw["long_description"] = parse_description() - setupkw["long_description_content_type"] = "text/x-rst" - setupkw["license"] = "Apache 2" - setupkw["packages"] = find_packages(".") - setupkw["python_requires"] = ">=3.9" - setupkw["classifiers"] = [ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: Utilities", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", + setupkw['name'] = NAME + setupkw['version'] = VERSION + setupkw['author'] = 'Kitware Inc., Jon Crall' + setupkw['author_email'] = 'kitware@kitware.com, jon.crall@kitware.com' + setupkw['url'] = 'https://gitlab.kitware.com/computer-vision/cmd_queue' + setupkw['description'] = 'The cmd_queue module for a DAG of bash commands' + setupkw['long_description'] = parse_description() + setupkw['long_description_content_type'] = 'text/x-rst' + setupkw['license'] = 'Apache 2' + setupkw['packages'] = find_packages('.') + setupkw['python_requires'] = '>=3.10' + setupkw['classifiers'] = [ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Utilities', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', ] - setupkw["package_data"] = {"": ["requirements/*.txt"]} - setupkw["entry_points"] = { - "console_scripts": [ - "cmd_queue = cmd_queue.__main__:main", + setupkw['package_data'] = {'': ['requirements/*.txt']} + setupkw['entry_points'] = { + 'console_scripts': [ + 'cmd_queue = cmd_queue.__main__:main', ], } setup(**setupkw) From 85df7c680d7bb7e052d4390143188d67351f6232 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:39:11 -0400 Subject: [PATCH 18/27] Add monitor to boilerplate cli --- CHANGELOG.md | 5 ++++- cmd_queue/cli_boilerplate.py | 10 ++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4121c7e..4c6fedd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## Version 0.3.0 - Unreleased +### Added: +* generalized the monitor so it can be launched in an independent process and reports errors better. + ### Fixed: * cwd will now handle failures if the directory doesnt exist in the bash queue * general improvements to bash script construction with per-job preamble commands @@ -14,7 +17,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Changed * deprecate `header_commands` for `preamble` -* Dropped support for 3.8 +* Dropped support for 3.8 and 3.9 * Transition from stubs to type annotations. diff --git a/cmd_queue/cli_boilerplate.py b/cmd_queue/cli_boilerplate.py index cf11843..4cf574d 100644 --- a/cmd_queue/cli_boilerplate.py +++ b/cmd_queue/cli_boilerplate.py @@ -99,14 +99,13 @@ >>> my_cli_main(cmdline=0, run=1, print_queue=0, print_commands=0) """ from typing import Any, Dict, Optional +import typing import scriptconfig as scfg import ubelt as ub - -__docstubs__ = """ -import cmd_queue -""" +if typing.TYPE_CHECKING: + import cmd_queue class CMDQueueConfig(scfg.DataConfig): @@ -159,6 +158,8 @@ class CMDQueueConfig(scfg.DataConfig): backend = scfg.Value('tmux', help=('The cmd_queue backend. Can be tmux, slurm, or serial'), group='cmd-queue') + monitor = scfg.Value('inline', help=('where the live status UI runs while'), group='cmd-queue', choices=['inline', 'tmux']) + queue_name = scfg.Value(None, help='overwrite the default queue name', group='cmd-queue') print_commands = scfg.Value('auto', isflag=True, help='enable / disable rprint before exec', group='cmd-queue') @@ -271,4 +272,5 @@ def run_queue( if config.run: queue.run(with_textual=config.with_textual, other_session_handler=config.other_session_handler, + monitor=config.monitor, **kwargs) From 96bb14cf14bb665a4e52a6b8885754c2c6a314f6 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:41:13 -0400 Subject: [PATCH 19/27] Use pytest 8+ everywhere --- requirements/tests.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/tests.txt b/requirements/tests.txt index 260e09f..a2eb4ae 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -2,8 +2,8 @@ pytest>=8.1.1 ; python_version < '4.0' and python_version >= '3.13' # Python 3.13+ pytest>=8.1.1 ; python_version < '3.13' and python_version >= '3.12' # Python 3.12 pytest>=8.1.1 ; python_version < '3.12' and python_version >= '3.11' # Python 3.11 -pytest>=6.2.5 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 -pytest>=6.2.5 ; python_version < '3.10' and python_version >= '3.8' # Python 3.8-3.9 +pytest>=8.1.1 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 +pytest>=8.1.1 ; python_version < '3.10' and python_version >= '3.8' # Python 3.8-3.9 xdoctest >= 1.1.5 From 2736398f337a686feee92837a72db5a127517c59 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:47:51 -0400 Subject: [PATCH 20/27] Fix type errors --- cmd_queue/monitor_app.py | 4 ++-- cmd_queue/slurm_queue.py | 7 +++---- cmd_queue/tmux_queue.py | 4 ++-- cmd_queue/util/textual_extensions.py | 10 +++++----- cmd_queue/util/util_algo.py | 3 ++- cmd_queue/util/util_yaml.py | 12 ++++++------ 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cmd_queue/monitor_app.py b/cmd_queue/monitor_app.py index 46639d9..f75ca21 100644 --- a/cmd_queue/monitor_app.py +++ b/cmd_queue/monitor_app.py @@ -55,7 +55,7 @@ def render(self) -> Any: return table -class CmdQueueMonitorApp(InstanceRunnableApp): +class CmdQueueMonitorApp(InstanceRunnableApp): # type: ignore """ A Textual App to monitor jobs """ @@ -121,7 +121,7 @@ async def action_quit(self) -> None: async def on_mount(self, event: Any) -> None: # from textual.layouts.vertical import VerticalLayout - view: DockView = await self.push_view(DockView()) + view: DockView = await self.push_view(DockView()) # type: ignore header = ExtHeader(tall=False) footer = textual.widgets.Footer() # panel = rich.panel.Panel() diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index 6dd4094..41ce50f 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -1,6 +1,4 @@ from __future__ import annotations -# mypy: ignore-errors - r""" Work in progress. The idea is to provide a TMUX queue and a SLURM queue that provide a common high level API, even though functionality might diverge, the @@ -246,7 +244,7 @@ def __init__( begin: Optional[Any] = None, shell: Optional[Any] = None, tags: Optional[Any] = None, - preamble: Optional[List[str]] = None, + preamble: List[str] | str | None = None, **kwargs: Any, ) -> None: super().__init__() @@ -254,7 +252,7 @@ def __init__( import uuid name = 'job-' + str(uuid.uuid4()) if depends is not None and not ub.iterable(depends): - depends = [depends] + depends = [depends] # type: ignore self.unused_kwargs = kwargs self.command = command self.name = name @@ -855,6 +853,7 @@ class UnableToMonitor(Exception): def update_jobid_status(): import rich + assert job_status_table is not None for row in job_status_table: if row['needs_update']: job_id = row['job_id'] diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 3386587..fedd89d 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -885,7 +885,7 @@ def serial_run(self) -> None: def monitor( self, refresh_rate: float = 0.4, - with_textual: str = 'auto', + with_textual: str | bool = 'auto', onfail: str = '', onexit: str = '', ) -> None: @@ -1393,7 +1393,7 @@ def has_stdin() -> bool: if not hasattr(CmdQueueMonitorApp, 'run'): raise ImportError('Current textual monitor is broken on new versions') except ImportError: - CmdQueueMonitorApp = None + CmdQueueMonitorApp = None # type: ignore if 0: diff --git a/cmd_queue/util/textual_extensions.py b/cmd_queue/util/textual_extensions.py index 7d08690..59a245c 100644 --- a/cmd_queue/util/textual_extensions.py +++ b/cmd_queue/util/textual_extensions.py @@ -47,12 +47,12 @@ class class_or_instancemethod(classmethod): >>> print(X().foo()) bound to the instance """ - def __get__(self, instance: Any, type_: Any) -> Any: - descr_get = super().__get__ if instance is None else self.__func__.__get__ + def __get__(self, instance: Any, type_: Any) -> Any: # type: ignore + descr_get = super().__get__ if instance is None else self.__func__.__get__ # type: ignore return descr_get(instance, type_) -class InstanceRunnableApp(App): +class InstanceRunnableApp(App): # type: ignore """ Extension of App that allows for running an instance @@ -155,7 +155,7 @@ def run( try: - class ExtHeader(Widget): + class ExtHeader(Widget): # type: ignore """ """ def __init__( @@ -229,4 +229,4 @@ async def on_click(self, event: Any) -> None: """ self.tall = not self.tall except Exception: - ExtHeader = None + ExtHeader = None # type: ignore diff --git a/cmd_queue/util/util_algo.py b/cmd_queue/util/util_algo.py index 8e7a802..4637fd7 100644 --- a/cmd_queue/util/util_algo.py +++ b/cmd_queue/util/util_algo.py @@ -1,11 +1,12 @@ from __future__ import annotations from typing import List +from collections.abc import Sequence import numpy as np -def balanced_number_partitioning(items: np.ndarray, num_parts: int) -> List[np.ndarray]: +def balanced_number_partitioning(items: np.ndarray | Sequence, num_parts: int) -> List[np.ndarray]: """ Greedy approximation to multiway number partitioning diff --git a/cmd_queue/util/util_yaml.py b/cmd_queue/util/util_yaml.py index 265f9c0..e229a61 100644 --- a/cmd_queue/util/util_yaml.py +++ b/cmd_queue/util/util_yaml.py @@ -104,10 +104,10 @@ def _custom_new_ruaml_yaml_obj(): import ruamel.yaml # make a new instance, although you could get the YAML # instance from the constructor argument - class CustomConstructor(ruamel.yaml.constructor.RoundTripConstructor): + class CustomConstructor(ruamel.yaml.constructor.RoundTripConstructor): # type: ignore ... - class CustomRepresenter(ruamel.yaml.representer.RoundTripRepresenter): + class CustomRepresenter(ruamel.yaml.representer.RoundTripRepresenter): # type: ignore ... CustomRepresenter.add_representer(str, _YamlRepresenter.str_presenter) @@ -139,7 +139,7 @@ def _construct_include_tag(self, node): yaml_obj.Constructor = CustomConstructor yaml_obj.Representer = CustomRepresenter yaml_obj.preserve_quotes = True - yaml_obj.width = float('inf') + yaml_obj.width = float('inf') # type: ignore return yaml_obj @@ -389,7 +389,7 @@ def InlineList(items): .. [SO56937691] https://stackoverflow.com/questions/56937691/making-yaml-ruamel-yaml-always-dump-lists-inline """ import ruamel.yaml - ret = ruamel.yaml.comments.CommentedSeq(items) + ret = ruamel.yaml.comments.CommentedSeq(items) # type: ignore ret.fa.set_flow_style() return ret @@ -408,10 +408,10 @@ def Dict(data): >>> print(Yaml.dumps(data)) """ import ruamel.yaml - ret = ruamel.yaml.comments.CommentedMap(data) + ret = ruamel.yaml.comments.CommentedMap(data) # type: ignore return ret @staticmethod def CodeBlock(text): import ruamel.yaml - return ruamel.yaml.scalarstring.LiteralScalarString(ub.codeblock(text)) + return ruamel.yaml.scalarstring.LiteralScalarString(ub.codeblock(text)) # type: ignore From b1c3869708319ab4ae14766c75a085b7f7670fa5 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:48:02 -0400 Subject: [PATCH 21/27] Fix type errors --- cmd_queue/serial_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd_queue/serial_queue.py b/cmd_queue/serial_queue.py index 2b71619..0657948 100644 --- a/cmd_queue/serial_queue.py +++ b/cmd_queue/serial_queue.py @@ -717,7 +717,7 @@ def add_header_command(self, command: str) -> None: migration='use preamble kwarg or add_preamble_command instead', deprecate='now', ) - self.add_preamble_command.append(command) + self.add_preamble_command(command) def add_preamble_command(self, command: Any) -> None: if isinstance(command, list): From 18b2b03623f2b97f8d2ada3e55885238e7788fc6 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:56:55 -0400 Subject: [PATCH 22/27] Fix types --- cmd_queue/airflow_queue.py | 9 +++++---- cmd_queue/base_queue.py | 12 ++++++++---- cmd_queue/monitor_app.py | 20 +++++++++----------- cmd_queue/serial_queue.py | 7 ++++--- cmd_queue/slurm_queue.py | 3 +++ tests/test_bash_job_errors.py | 6 +++--- 6 files changed, 32 insertions(+), 25 deletions(-) diff --git a/cmd_queue/airflow_queue.py b/cmd_queue/airflow_queue.py index e2280ed..cc04441 100644 --- a/cmd_queue/airflow_queue.py +++ b/cmd_queue/airflow_queue.py @@ -32,7 +32,7 @@ import os import time import uuid -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Iterable, List, Optional import ubelt as ub @@ -61,7 +61,7 @@ def __init__( if name is None: name = 'job-' + str(uuid.uuid4()) if depends is not None and not ub.iterable(depends): - depends = [depends] + depends = [depends] # type: ignore self.unused_kwargs = kwargs self.command = command self.name = name @@ -465,7 +465,8 @@ def demo() -> None: from datetime import timezone from datetime import datetime as datetime_cls from airflow.operators.bash import BashOperator - now = datetime_cls.utcnow().replace(tzinfo=timezone.utc) + now = datetime_cls.now(timezone.utc) + # now = datetime_cls.utcnow().replace(tzinfo=timezone.utc) dag = DAG( 'mycustomdag', start_date=now, @@ -475,7 +476,7 @@ def demo() -> None: t1 = BashOperator(task_id='task1', bash_command='date', dag=dag) t2 = BashOperator(task_id='task2', bash_command='echo hi 1 && true', dag=dag) t2.set_upstream(t1) - dag.run(verbose=True, local=True) + dag.run(verbose=True, local=True) # type: ignore if __name__ == '__main__': diff --git a/cmd_queue/base_queue.py b/cmd_queue/base_queue.py index e7af2b8..3306da2 100644 --- a/cmd_queue/base_queue.py +++ b/cmd_queue/base_queue.py @@ -27,7 +27,7 @@ def __init__( ) -> None: # This is unused, should the slurm and bash job reuse this? if depends is not None and not ub.iterable(depends): - depends = [depends] + depends = [depends] # type: ignore self.name = name self.command = command self.depends = depends @@ -279,7 +279,7 @@ def write_network_text( try: import rich as rich_mod except ImportError: - rich_mod = None + rich_mod = None # type: ignore if rich == 'auto': rich = rich_mod is not None @@ -429,14 +429,18 @@ def _dependency_graph(self) -> Any: graph.add_edge(dep.name, job.name) return graph - def monitor(self) -> None: + def monitor(self, + refresh_rate: float = 0.4, + with_textual: str | bool = 'auto', + onfail: str = '', + onexit: str = '') -> None: print('monitor not implemented') def _coerce_style( self, style: str = 'auto', with_rich: Optional[bool] = None, - colors: bool = True, + colors: bool | int = True, ) -> str: # Helper if with_rich is not None: diff --git a/cmd_queue/monitor_app.py b/cmd_queue/monitor_app.py index f75ca21..c7d95d4 100644 --- a/cmd_queue/monitor_app.py +++ b/cmd_queue/monitor_app.py @@ -1,6 +1,4 @@ from __future__ import annotations -# mypy: ignore-errors - from types import ModuleType from typing import Any, Callable, Optional, Tuple @@ -18,17 +16,17 @@ from cmd_queue.util import texter as textual # import ubelt as ub except ImportError: - rich: ModuleType = None - textual: ModuleType = None - events: ModuleType = None - ScrollView: type = object - Widget: type = object - DockView: type = object - InstanceRunnableApp: type = object - ExtHeader: type = object + rich: ModuleType = None # type: ignore + textual: ModuleType = None # type: ignore + events: ModuleType = None # type: ignore + ScrollView: type = object # type: ignore + Widget: type = object # type: ignore + DockView: type = object # type: ignore + InstanceRunnableApp: type = object # type: ignore + ExtHeader: type = object # type: ignore -class JobTable(Widget): +class JobTable(Widget): # type: ignore def __init__( self, diff --git a/cmd_queue/serial_queue.py b/cmd_queue/serial_queue.py index 0657948..3d87b9d 100644 --- a/cmd_queue/serial_queue.py +++ b/cmd_queue/serial_queue.py @@ -105,8 +105,9 @@ def __init__( ) -> None: if depends is not None and not ub.iterable(depends): - depends = [depends] + depends = [depends] # type: ignore self.name = name + assert self.name is not None self.pathid = self.name + '_' + ub.hash_data(uuid.uuid4())[0:8] self.kwargs = kwargs # unused kwargs self.cwd = cwd @@ -186,7 +187,7 @@ def finalize_text( v2 = conditionals.get(k) if not ub.iterable(v2): v2 = [v2] - v.extend(v2) + v.extend(v2) # type: ignore if with_status: prefix_script.append('# Ensure job status directory') @@ -709,7 +710,7 @@ def _command_exit(): text = '\n'.join(script) return text - def add_header_command(self, command: str) -> None: + def add_header_command(self, command: str) -> None: # type: ignore ub.schedule_deprecation( modname='cmd_queue', name='add_header_command', diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index 41ce50f..faedfce 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -783,6 +783,9 @@ def _is_finished() -> bool: def monitor( self, refresh_rate: float = 0.4, + # TODO: use or document as unused or make the signature sane across + # clsses + with_textual: str | bool = 'auto', onfail: str = '', onexit: str = '', ) -> Optional[Any]: diff --git a/tests/test_bash_job_errors.py b/tests/test_bash_job_errors.py index 930f33d..14ce711 100644 --- a/tests/test_bash_job_errors.py +++ b/tests/test_bash_job_errors.py @@ -48,10 +48,10 @@ def test_bash_job_errors(): pyexe = sys.executable self = BashJob(f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=True) - self.print_commands(1, 1) + self.print_commands(True, True) self = BashJob(f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=False) - self.print_commands(1, 1) + self.print_commands(True, True) def test_tmux_queue_errors(): @@ -71,7 +71,7 @@ def test_tmux_queue_errors(): job3 = queue.submit(f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', log=log, depends=job2) job4 = queue.submit(f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', log=log) # queue.submit(f'{pyexe} {script_fpath} --steps=2', log=log) - queue.print_commands(1, 1) + queue.print_commands(True, True) queue.write() if not queue.is_available(): From 96565f01377b0889da1d0915fe4bf2f4e9d86cad Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:57:40 -0400 Subject: [PATCH 23/27] Ruff format --- cmd_queue/__init__.py | 4 +- cmd_queue/airflow_queue.py | 88 +++++--- cmd_queue/base_queue.py | 113 +++++++--- cmd_queue/cli_boilerplate.py | 138 ++++++++---- cmd_queue/main.py | 151 +++++++++---- cmd_queue/monitor_app.py | 10 +- cmd_queue/monitor_manifest.py | 2 + cmd_queue/serial_queue.py | 151 +++++++++---- cmd_queue/slurm_queue.py | 176 +++++++++++----- cmd_queue/slurmify.py | 36 +++- cmd_queue/tmux_queue.py | 304 ++++++++++++++++++--------- cmd_queue/util/__init__.py | 16 +- cmd_queue/util/richer.py | 88 ++++++-- cmd_queue/util/texter.py | 45 +++- cmd_queue/util/textual_extensions.py | 59 ++++-- cmd_queue/util/util_algo.py | 4 +- cmd_queue/util/util_bash.py | 12 +- cmd_queue/util/util_tags.py | 1 + cmd_queue/util/util_tmux.py | 152 +++++++++----- cmd_queue/util/util_yaml.py | 48 ++++- dev/_devcheck_rich.py | 8 +- examples/slurm_example.py | 36 ++-- examples/tmux_example.py | 73 ++++--- run_tests.py | 13 +- tests/test_airflow_queue.py | 10 +- tests/test_bash_job_errors.py | 44 +++- tests/test_bash_variants.py | 270 +++++++++++++++--------- tests/test_cli.py | 13 +- tests/test_errors.py | 2 +- tests/test_import.py | 1 + tests/test_slurm_variants.py | 32 +-- tests/tests_mixed_hardware_tmux.py | 56 ++++- 32 files changed, 1502 insertions(+), 654 deletions(-) diff --git a/cmd_queue/__init__.py b/cmd_queue/__init__.py index 23987ae..30267dd 100644 --- a/cmd_queue/__init__.py +++ b/cmd_queue/__init__.py @@ -314,6 +314,8 @@ } from cmd_queue import base_queue -from cmd_queue.base_queue import (Queue,) +from cmd_queue.base_queue import ( + Queue, +) __all__ = ['Queue', 'base_queue'] diff --git a/cmd_queue/airflow_queue.py b/cmd_queue/airflow_queue.py index cc04441..7862d33 100644 --- a/cmd_queue/airflow_queue.py +++ b/cmd_queue/airflow_queue.py @@ -43,6 +43,7 @@ class AirflowJob(base_queue.Job): """ Represents a airflow job that hasn't been executed yet """ + def __init__( self, command: str, @@ -132,8 +133,14 @@ def __init__( self.name = name stamp = time.strftime('%Y%m%dT%H%M%S') self.unused_kwargs = kwargs - self.queue_id = name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8] - base_dpath = ub.Path(dpath) if dpath is not None else ub.Path.appdir('cmd_queue') / 'airflow' + self.queue_id = ( + name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8] + ) + base_dpath = ( + ub.Path(dpath) + if dpath is not None + else ub.Path.appdir('cmd_queue') / 'airflow' + ) self.dpath = (base_dpath / self.queue_id).ensuredir() self.dags_dpath = (self.dpath / 'dags').ensuredir() self.log_dpath = (self.dpath / 'logs').ensuredir() @@ -142,7 +149,11 @@ def __init__( self.preamble = [] self.all_depends = None self.job_info_dpath = self.dpath / 'job_info' - home = ub.Path(airflow_home) if airflow_home is not None else (self.dpath / 'airflow_home') + home = ( + ub.Path(airflow_home) + if airflow_home is not None + else (self.dpath / 'airflow_home') + ) self.airflow_home = home.ensuredir() if preamble is not None: self.add_preamble_command(preamble) @@ -188,7 +199,7 @@ def _airflow_env(self): env['AIRFLOW__CORE__LOAD_EXAMPLES'] = 'False' env.setdefault( 'AIRFLOW__DATABASE__SQL_ALCHEMY_CONN', - f"sqlite:///{self.airflow_home / 'airflow.db'}", + f'sqlite:///{self.airflow_home / "airflow.db"}', ) return env @@ -213,7 +224,9 @@ def run(self, block: bool = True, system: bool = False) -> None: env = self._airflow_env() detach = not block if detach: - raise NotImplementedError('Non-blocking airflow runs are not implemented yet') + raise NotImplementedError( + 'Non-blocking airflow runs are not implemented yet' + ) with self._patched_env(env): from airflow.utils import db from airflow.models.dagbag import DagBag @@ -223,6 +236,7 @@ def run(self, block: bool = True, system: bool = False) -> None: from airflow.utils.session import create_session import sys import contextlib + if hasattr(db, 'resetdb'): db.resetdb() elif hasattr(db, 'check_and_run_migrations'): @@ -231,10 +245,16 @@ def run(self, block: bool = True, system: bool = False) -> None: db.upgradedb() else: db.initdb() - dag_bag = DagBag(dag_folder=os.fspath(self.dags_dpath), include_examples=False, safe_mode=False) + dag_bag = DagBag( + dag_folder=os.fspath(self.dags_dpath), + include_examples=False, + safe_mode=False, + ) dag = dag_bag.get_dag(self.name) if dag is None: - raise RuntimeError(f'Could not load DAG {self.name} from {self.dags_dpath}') + raise RuntimeError( + f'Could not load DAG {self.name} from {self.dags_dpath}' + ) # Airflow 3 requires DAG bundle versioning unless explicitly disabled. if not getattr(dag, 'disable_bundle_versioning', False): dag.disable_bundle_versioning = True @@ -245,7 +265,11 @@ def run(self, block: bool = True, system: bool = False) -> None: session.flush() dag_model = session.get(DagModel, dag.dag_id) if dag_model is None: - dag_model = DagModel(dag_id=dag.dag_id, fileloc=dag.fileloc, bundle_name=bundle_name) + dag_model = DagModel( + dag_id=dag.dag_id, + fileloc=dag.fileloc, + bundle_name=bundle_name, + ) else: dag_model.fileloc = dag.fileloc dag_model.bundle_name = bundle_name @@ -256,7 +280,10 @@ def run(self, block: bool = True, system: bool = False) -> None: # be closed unexpectedly. Ensure Airflow writes to the real stdout/ # stderr streams to avoid "I/O operation on closed file" errors # during tests. - with contextlib.redirect_stdout(sys.__stdout__), contextlib.redirect_stderr(sys.__stderr__): + with ( + contextlib.redirect_stdout(sys.__stdout__), + contextlib.redirect_stderr(sys.__stderr__), + ): dag.test() def read_state(self): @@ -272,13 +299,16 @@ def read_state(self): from airflow.models.dagrun import DagRun from airflow.models.taskinstance import TaskInstance from sqlalchemy import select + try: from airflow.utils.state import TaskInstanceState + success_state = TaskInstanceState.SUCCESS failed_state = TaskInstanceState.FAILED skipped_state = TaskInstanceState.SKIPPED except Exception: # pragma: no cover from airflow.utils.state import State as TaskInstanceState # type: ignore + success_state = TaskInstanceState.SUCCESS failed_state = TaskInstanceState.FAILED skipped_state = TaskInstanceState.SKIPPED @@ -311,23 +341,22 @@ def read_state(self): summary['status'] = getattr(dagrun, 'state', 'unknown') summary['run_id'] = dagrun.run_id - ti_stmt = ( - select(TaskInstance.state) - .where( - TaskInstance.dag_id == dagrun.dag_id, - TaskInstance.run_id == dagrun.run_id, - ) + ti_stmt = select(TaskInstance.state).where( + TaskInstance.dag_id == dagrun.dag_id, + TaskInstance.run_id == dagrun.run_id, ) states = list(session.scalars(ti_stmt)) passed = sum(state == success_state for state in states) failed = sum(state == failed_state for state in states) skipped = sum(state == skipped_state for state in states) - summary.update({ - 'total': len(states), - 'passed': passed, - 'failed': failed, - 'skipped': skipped, - }) + summary.update( + { + 'total': len(states), + 'passed': passed, + 'failed': failed, + 'skipped': skipped, + } + ) return summary def finalize_text(self) -> str: @@ -337,7 +366,7 @@ def finalize_text(self) -> str: topo_jobs = [self.named_jobs[n] for n in nx.topological_sort(graph)] header = ub.codeblock( - f''' + f""" from airflow import DAG from datetime import timezone from datetime import datetime as datetime_cls @@ -350,7 +379,7 @@ def finalize_text(self) -> str: tags=['cmd_queue'], ) jobs = dict() - ''' + """ ) parts = [header] for job in topo_jobs: @@ -359,7 +388,9 @@ def finalize_text(self) -> str: for job in topo_jobs: for dep in job.depends or []: if dep is not None: - parts.append(f'jobs[{job.name!r}].set_upstream(jobs[{dep.name!r}])') + parts.append( + f'jobs[{job.name!r}].set_upstream(jobs[{dep.name!r}])' + ) # if depends: # for dep in depends: @@ -398,7 +429,8 @@ def submit(self, command: str, **kwargs: Any) -> AirflowJob: depends = [depends] depends = [ self.named_jobs[dep] if isinstance(dep, str) else dep - for dep in depends] + for dep in depends + ] job = AirflowJob(command, depends=depends, **kwargs) self.jobs.append(job) self.num_real_jobs += 1 @@ -435,6 +467,7 @@ def print_commands( from rich.panel import Panel from rich.syntax import Syntax from rich.console import Console + console = Console() console.print(Panel(Syntax(code, 'python'), title=str(self.fpath))) # console.print(Syntax(code, 'bash')) @@ -465,6 +498,7 @@ def demo() -> None: from datetime import timezone from datetime import datetime as datetime_cls from airflow.operators.bash import BashOperator + now = datetime_cls.now(timezone.utc) # now = datetime_cls.utcnow().replace(tzinfo=timezone.utc) dag = DAG( @@ -474,7 +508,9 @@ def demo() -> None: tags=['example'], ) t1 = BashOperator(task_id='task1', bash_command='date', dag=dag) - t2 = BashOperator(task_id='task2', bash_command='echo hi 1 && true', dag=dag) + t2 = BashOperator( + task_id='task2', bash_command='echo hi 1 && true', dag=dag + ) t2.set_upstream(t1) dag.run(verbose=True, local=True) # type: ignore diff --git a/cmd_queue/base_queue.py b/cmd_queue/base_queue.py index 3306da2..f67757e 100644 --- a/cmd_queue/base_queue.py +++ b/cmd_queue/base_queue.py @@ -6,18 +6,17 @@ import ubelt as ub -class DuplicateJobError(KeyError): - ... +class DuplicateJobError(KeyError): ... -class UnknownBackendError(KeyError): - ... +class UnknownBackendError(KeyError): ... class Job(ub.NiceRepr): """ Base class for a job """ + def __init__( self, command: Optional[str] = None, @@ -130,7 +129,9 @@ def sync(self) -> Queue: """ graph = self._dependency_graph() # Find the jobs that nobody depends on - sink_jobs = [graph.nodes[n]['job'] for n, d in graph.out_degree if d == 0] + sink_jobs = [ + graph.nodes[n]['job'] for n, d in graph.out_degree if d == 0 + ] # All new jobs must depend on these jobs self.all_depends = sink_jobs return self @@ -142,12 +143,21 @@ def write(self) -> Any: """ import os import stat + text = self.finalize_text() self.fpath.parent.ensuredir() self.fpath.write_text(text) - os.chmod(self.fpath, ( - stat.S_IXUSR | stat.S_IXGRP | stat.S_IRUSR | - stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)) + os.chmod( + self.fpath, + ( + stat.S_IXUSR + | stat.S_IXGRP + | stat.S_IRUSR + | stat.S_IWUSR + | stat.S_IRGRP + | stat.S_IWGRP + ), + ) return self.fpath def submit(self, command: Union[str, Job], **kwargs: Any) -> Job: @@ -170,7 +180,9 @@ def submit(self, command: Union[str, Job], **kwargs: Any) -> Job: if isinstance(command, str): name = kwargs.get('name', None) if name is None: - name = kwargs['name'] = self.name + '-job-{}'.format(self.num_real_jobs) + name = kwargs['name'] = self.name + '-job-{}'.format( + self.num_real_jobs + ) # TODO: make sure name is path safe. if ':' in name: @@ -193,9 +205,14 @@ def submit(self, command: Union[str, Job], **kwargs: Any) -> Job: try: depends = [ self.named_jobs[dep] if isinstance(dep, str) else dep - for dep in depends] + for dep in depends + ] except Exception: - print('self.named_jobs = {}'.format(ub.urepr(self.named_jobs, nl=1))) + print( + 'self.named_jobs = {}'.format( + ub.urepr(self.named_jobs, nl=1) + ) + ) raise job = serial_queue.BashJob(command, depends=depends, **kwargs) elif isinstance(command, Job): @@ -223,6 +240,7 @@ def _backend_classes(cls): from cmd_queue import serial_queue from cmd_queue import slurm_queue from cmd_queue import airflow_queue + lut = { 'serial': serial_queue.SerialQueue, 'tmux': tmux_queue.TMUXMultiQueue, @@ -252,17 +270,21 @@ def create(cls, backend: str = 'serial', **kwargs: Any) -> Queue: """ if backend == 'serial': from cmd_queue import serial_queue + kwargs.pop('size', None) self = serial_queue.SerialQueue(**kwargs) elif backend == 'tmux': from cmd_queue import tmux_queue + self = tmux_queue.TMUXMultiQueue(**kwargs) elif backend == 'slurm': from cmd_queue import slurm_queue + kwargs.pop('size', None) self = slurm_queue.SlurmQueue(**kwargs) elif backend == 'airflow': from cmd_queue import airflow_queue + kwargs.pop('size', None) self = airflow_queue.AirflowQueue(**kwargs) else: @@ -289,20 +311,26 @@ def write_network_text( print_ = print import networkx as nx + graph = self._dependency_graph() if reduced: print_('\nGraph (reduced):') try: reduced_graph = nx.transitive_reduction(graph) - nx.write_network_text(reduced_graph, path=print_, end='', - vertical_chains=vertical_chains) + nx.write_network_text( + reduced_graph, + path=print_, + end='', + vertical_chains=vertical_chains, + ) except Exception as ex: print_(f'ex={ex}') print_('\n') else: print_('\nGraph:') - nx.write_network_text(graph, path=print_, end='', - vertical_chains=vertical_chains) + nx.write_network_text( + graph, path=print_, end='', vertical_chains=vertical_chains + ) def print_commands( self, @@ -340,17 +368,23 @@ def print_commands( colors = kwargs.get('colors', None) if colors is not None: ub.schedule_deprecation( - 'cmd_queue', 'colors', 'arg', + 'cmd_queue', + 'colors', + 'arg', migration='use style="plain" | "rich" | "colors" instead', - deprecate='now') + deprecate='now', + ) if not colors: style = 'plain' with_rich = kwargs.get('with_rich', None) if with_rich is not None: ub.schedule_deprecation( - 'cmd_queue', 'with_rich', 'arg', + 'cmd_queue', + 'with_rich', + 'arg', migration='use use style="plain" | "rich" | "colors" instead', - deprecate='now') + deprecate='now', + ) if with_rich: style = 'rich' if style == 'auto': @@ -358,16 +392,19 @@ def print_commands( # style = 'rich' if colors else 'plain' from cmd_queue.util import util_tags + exclude_tags = util_tags.Tags.coerce(exclude_tags) code = self.finalize_text( with_status=with_status, with_gaurds=with_gaurds, with_locks=with_locks, - exclude_tags=exclude_tags) + exclude_tags=exclude_tags, + ) if style == 'rich': from rich.syntax import Syntax from rich.panel import Panel from rich.console import Console + console = Console() console.print(Panel(Syntax(code, 'bash'), title=str(self.fpath))) elif style == 'colors': @@ -381,19 +418,25 @@ def print_commands( def rprint(self, **kwargs: Any) -> None: ub.schedule_deprecation( - 'cmd_queue', name='rprint', type='arg', + 'cmd_queue', + name='rprint', + type='arg', migration='print_commands', ) self.print_commands(**kwargs) - def print_graph(self, reduced: bool = True, vertical_chains: bool = False) -> None: + def print_graph( + self, reduced: bool = True, vertical_chains: bool = False + ) -> None: """ Renders the dependency graph to an "network text" Args: reduced (bool): if True only show the implicit dependency forest """ - self.write_network_text(reduced=reduced, vertical_chains=vertical_chains) + self.write_network_text( + reduced=reduced, vertical_chains=vertical_chains + ) def _dependency_graph(self) -> Any: """ @@ -414,10 +457,13 @@ def _dependency_graph(self) -> Any: >>> self.print_graph() """ import networkx as nx + graph = nx.DiGraph() duplicate_names = ub.find_duplicates(self.jobs, key=lambda x: x.name) if duplicate_names: - print('duplicate_names = {}'.format(ub.urepr(duplicate_names, nl=1))) + print( + 'duplicate_names = {}'.format(ub.urepr(duplicate_names, nl=1)) + ) raise Exception('Job names must be unique') for index, job in enumerate(self.jobs): @@ -429,11 +475,13 @@ def _dependency_graph(self) -> Any: graph.add_edge(dep.name, job.name) return graph - def monitor(self, - refresh_rate: float = 0.4, - with_textual: str | bool = 'auto', - onfail: str = '', - onexit: str = '') -> None: + def monitor( + self, + refresh_rate: float = 0.4, + with_textual: str | bool = 'auto', + onfail: str = '', + onexit: str = '', + ) -> None: print('monitor not implemented') def _coerce_style( @@ -445,8 +493,11 @@ def _coerce_style( # Helper if with_rich is not None: ub.schedule_deprecation( - 'cmd_queue', 'with_rich', 'arg', - migration='use style="rich" instead') + 'cmd_queue', + 'with_rich', + 'arg', + migration='use style="rich" instead', + ) if with_rich: style = 'rich' if style == 'auto': diff --git a/cmd_queue/cli_boilerplate.py b/cmd_queue/cli_boilerplate.py index 4cf574d..7c27667 100644 --- a/cmd_queue/cli_boilerplate.py +++ b/cmd_queue/cli_boilerplate.py @@ -154,42 +154,91 @@ class CMDQueueConfig(scfg.DataConfig): slurm_options = scfg.Value(None, help='if the backend is slurm, provide a YAML dictionary for things like partition / etc...', group='cmd-queue') """ - run = scfg.Value(False, isflag=True, help='if False, only prints the commands, otherwise executes them', group='cmd-queue') - backend = scfg.Value('tmux', help=('The cmd_queue backend. Can be tmux, slurm, or serial'), group='cmd-queue') - - monitor = scfg.Value('inline', help=('where the live status UI runs while'), group='cmd-queue', choices=['inline', 'tmux']) - - queue_name = scfg.Value(None, help='overwrite the default queue name', group='cmd-queue') - - print_commands = scfg.Value('auto', isflag=True, help='enable / disable rprint before exec', group='cmd-queue') - - print_queue = scfg.Value('auto', isflag=True, help='print the cmd queue DAG', group='cmd-queue') - - with_textual = scfg.Value('auto', isflag=True, help='setting for cmd-queue monitoring', group='cmd-queue') - - other_session_handler = scfg.Value('ask', help='for tmux backend only. How to handle conflicting sessions. Can be ask, kill, or ignore, or auto', group='cmd-queue') - - virtualenv_cmd = scfg.Value(None, type=str, help=ub.paragraph( - ''' + run = scfg.Value( + False, + isflag=True, + help='if False, only prints the commands, otherwise executes them', + group='cmd-queue', + ) + + backend = scfg.Value( + 'tmux', + help=('The cmd_queue backend. Can be tmux, slurm, or serial'), + group='cmd-queue', + ) + + monitor = scfg.Value( + 'inline', + help=('where the live status UI runs while'), + group='cmd-queue', + choices=['inline', 'tmux'], + ) + + queue_name = scfg.Value( + None, help='overwrite the default queue name', group='cmd-queue' + ) + + print_commands = scfg.Value( + 'auto', + isflag=True, + help='enable / disable rprint before exec', + group='cmd-queue', + ) + + print_queue = scfg.Value( + 'auto', isflag=True, help='print the cmd queue DAG', group='cmd-queue' + ) + + with_textual = scfg.Value( + 'auto', + isflag=True, + help='setting for cmd-queue monitoring', + group='cmd-queue', + ) + + other_session_handler = scfg.Value( + 'ask', + help='for tmux backend only. How to handle conflicting sessions. Can be ask, kill, or ignore, or auto', + group='cmd-queue', + ) + + virtualenv_cmd = scfg.Value( + None, + type=str, + help=ub.paragraph( + """ Command to start the appropriate virtual environment if your bashrc - does not start it by default.'''), group='cmd-queue') + does not start it by default.""" + ), + group='cmd-queue', + ) # TODO: add global preamble argument - tmux_workers = scfg.Value(8, help='number of tmux workers in the queue for the tmux backend', group='cmd-queue') + tmux_workers = scfg.Value( + 8, + help='number of tmux workers in the queue for the tmux backend', + group='cmd-queue', + ) - slurm_options = scfg.Value(None, help=ub.paragraph( - ''' + slurm_options = scfg.Value( + None, + help=ub.paragraph( + """ if the backend is slurm, provide a YAML dictionary for things like partition / etc... - '''), group='cmd-queue') + """ + ), + group='cmd-queue', + ) def __post_init__(self) -> None: from cmd_queue.util.util_yaml import Yaml + self.slurm_options = Yaml.coerce(self.slurm_options) or {} - def create_queue(config, **kwargs: Any) -> "cmd_queue.Queue": + def create_queue(config, **kwargs: Any) -> 'cmd_queue.Queue': """ Create an empty queue based on options specified in this config @@ -200,19 +249,20 @@ def create_queue(config, **kwargs: Any) -> "cmd_queue.Queue": cmd_queue.Queue """ import cmd_queue + queuekw = {} if config.backend == 'slurm': queuekw.update(config.slurm_options) elif config.backend == 'tmux': - queuekw.update({ - 'size': config.tmux_workers, - }) + queuekw.update( + { + 'size': config.tmux_workers, + } + ) queuekw.update(kwargs) if 'name' not in queuekw: queuekw['name'] = config.queue_name - queue = cmd_queue.Queue.create( - backend=config.backend, - **queuekw) + queue = cmd_queue.Queue.create(backend=config.backend, **queuekw) if config.virtualenv_cmd: # Experimental feature to automatically activate virtual # environments @@ -220,9 +270,12 @@ def create_queue(config, **kwargs: Any) -> "cmd_queue.Queue": if virtualenv_cmd == 'auto': import os import shlex + venv_path = os.environ.get('VIRTUAL_ENV', '') if venv_path: - virtualenv_cmd = 'source ' + shlex.quote(str(ub.Path(venv_path) / 'bin/activate')) + virtualenv_cmd = 'source ' + shlex.quote( + str(ub.Path(venv_path) / 'bin/activate') + ) else: virtualenv_cmd = None if virtualenv_cmd: @@ -231,7 +284,7 @@ def create_queue(config, **kwargs: Any) -> "cmd_queue.Queue": def run_queue( config, - queue: "cmd_queue.Queue", + queue: 'cmd_queue.Queue', print_kwargs: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> None: @@ -243,22 +296,27 @@ def run_queue( print_kwargs (None | Dict): """ import cmd_queue + queue: cmd_queue.Queue print_thresh = 30 if config['print_commands'] == 'auto': if len(queue) < print_thresh: config['print_commands'] = 1 else: - print(f'More than {print_thresh} jobs, skip queue.print_commands. ' - 'If you want to see them explicitly specify print_commands=1') + print( + f'More than {print_thresh} jobs, skip queue.print_commands. ' + 'If you want to see them explicitly specify print_commands=1' + ) config['print_commands'] = 0 if config['print_queue'] == 'auto': if len(queue) < print_thresh: config['print_queue'] = 1 else: - print(f'More than {print_thresh} jobs, skip queue.print_graph. ' - 'If you want to see them explicitly specify print_queue=1') + print( + f'More than {print_thresh} jobs, skip queue.print_graph. ' + 'If you want to see them explicitly specify print_queue=1' + ) config['print_queue'] = 0 if config.print_commands: @@ -270,7 +328,9 @@ def run_queue( queue.print_graph(vertical_chains=True) if config.run: - queue.run(with_textual=config.with_textual, - other_session_handler=config.other_session_handler, - monitor=config.monitor, - **kwargs) + queue.run( + with_textual=config.with_textual, + other_session_handler=config.other_session_handler, + monitor=config.monitor, + **kwargs, + ) diff --git a/cmd_queue/main.py b/cmd_queue/main.py index 02ba7d8..b4c2672 100644 --- a/cmd_queue/main.py +++ b/cmd_queue/main.py @@ -66,19 +66,22 @@ def _testcase(): """ + if TYPE_CHECKING: import cmd_queue class CommonConfig(scfg.DataConfig): - qname = scfg.Value(None, position=1, help='name of the CLI queue') - dpath = scfg.Value('auto', help=ub.paragraph( - ''' + dpath = scfg.Value( + 'auto', + help=ub.paragraph( + """ The path the CLI will use to store intermediate files. Defaults to $XDG_CACHE/.cache/cmd_queue/cli - ''' - )) + """ + ), + ) verbose = scfg.Value(1, help='verbosity level') @@ -93,24 +96,38 @@ def main(cls, argv: int = 1, **kwargs: Any) -> None: rich.print('config = ' + ub.urepr(config, nl=1)) cli_queue_name = config['qname'] config.cli_queue_dpath = ub.Path(config['dpath']) - config.cli_queue_fpath = config.cli_queue_dpath / (str(cli_queue_name) + '.cmd_queue.json') + config.cli_queue_fpath = config.cli_queue_dpath / ( + str(cli_queue_name) + '.cmd_queue.json' + ) config.run() class CommonShowRun(CommonConfig): - workers = scfg.Value(1, help='number of concurrent queues for the tmux backend.') - - backend = scfg.Value('tmux', help='the execution backend to use', choices=['tmux', 'slurm', 'serial', 'airflow']) - - gpus = scfg.Value(None, help='a comma separated list of the gpu numbers to spread across. tmux backend only.') - - def _build_queue(config) -> "cmd_queue.Queue": + workers = scfg.Value( + 1, help='number of concurrent queues for the tmux backend.' + ) + + backend = scfg.Value( + 'tmux', + help='the execution backend to use', + choices=['tmux', 'slurm', 'serial', 'airflow'], + ) + + gpus = scfg.Value( + None, + help='a comma separated list of the gpu numbers to spread across. tmux backend only.', + ) + + def _build_queue(config) -> 'cmd_queue.Queue': import cmd_queue import json - queue = cmd_queue.Queue.create(size=max(1, config['workers']), - backend=config['backend'], - name=config['qname'], - gpus=config['gpus']) + + queue = cmd_queue.Queue.create( + size=max(1, config['workers']), + backend=config['backend'], + name=config['qname'], + gpus=config['gpus'], + ) # Run a new CLI queue data = json.loads(config.cli_queue_fpath.read_text()) print('data = {}'.format(ub.urepr(data, nl=1))) @@ -128,17 +145,23 @@ def _build_queue(config) -> "cmd_queue.Queue": if len(bash_command) == 1: # hack import shlex + if shlex.quote(bash_command[0]) == bash_command[0]: bash_command = bash_command[0] else: bash_command = shlex.quote(bash_command[0]) else: import shlex - bash_command = ' '.join([shlex.quote(str(p)) for p in bash_command]) + + bash_command = ' '.join( + [shlex.quote(str(p)) for p in bash_command] + ) submitkw = ub.udict(row) & {'name', 'depends'} print('\n\n\n') print(f'submitkw={submitkw}') - print('bash_command = {}'.format(ub.urepr(bash_command, nl=1))) + print( + 'bash_command = {}'.format(ub.urepr(bash_command, nl=1)) + ) print('\n\n\n') queue.submit(bash_command, log=False, **submitkw) except Exception: @@ -256,11 +279,18 @@ class cleanup(CommonConfig): cleanup tmux sessions """ - yes = scfg.Value(False, isflag=True, help='if True say yes to prompts', short_alias=['y']) + yes = scfg.Value( + False, + isflag=True, + help='if True say yes to prompts', + short_alias=['y'], + ) __command__ = 'cleanup' + def run(config) -> None: from cmd_queue.util.util_tmux import tmux + sessions = tmux.list_sessions() print('sessions = {}'.format(ub.urepr(sessions, nl=1))) @@ -271,6 +301,7 @@ def run(config) -> None: sessions_ids.append(session['id']) print('sessions_ids = {}'.format(ub.urepr(sessions_ids, nl=1))) from rich import prompt + if config.yes or prompt.Confirm.ask('Do you want to kill these?'): for session_id in sessions_ids: tmux.kill_session(session_id) @@ -279,10 +310,11 @@ class run(CommonShowRun): """ run a queue """ + __command__ = 'run' + def run(config) -> None: - """ - """ + """ """ queue = config._build_queue() queue.run() @@ -297,32 +329,50 @@ class monitor(CommonConfig): the entry point used by the tmux monitor backend to host the status UI in its own session. """ + __command__ = 'monitor' - manifest = scfg.Value(None, help=ub.paragraph( - ''' + manifest = scfg.Value( + None, + help=ub.paragraph( + """ Optional explicit path to the monitor manifest JSON. If given, this overrides positional name resolution. - ''')) + """ + ), + ) - onfail = scfg.Value('', choices=['', 'kill'], help=ub.paragraph( - ''' + onfail = scfg.Value( + '', + choices=['', 'kill'], + help=ub.paragraph( + """ What to do if the queue ends with at least one failure. ``kill`` cancels still-running workers; ``''`` leaves them. - ''')) + """ + ), + ) - onexit = scfg.Value('', choices=['', 'capture'], help=ub.paragraph( - ''' + onexit = scfg.Value( + '', + choices=['', 'capture'], + help=ub.paragraph( + """ What to do once the queue is fully done. ``capture`` runs the backend's capture step (e.g. dump tmux pane contents). - ''')) + """ + ), + ) refresh_rate = scfg.Value(0.4, help='monitor refresh rate, seconds') - with_textual = scfg.Value('auto', help='use textual UI if available (tmux backend only)') + with_textual = scfg.Value( + 'auto', help='use textual UI if available (tmux backend only)' + ) def run(config) -> None: from cmd_queue import monitor_manifest as mm + if config.manifest: manifest_path = ub.Path(config.manifest).expand().absolute() if not manifest_path.exists(): @@ -336,7 +386,9 @@ def run(config) -> None: ) manifest_path = mm.resolve_manifest(target) if config.verbose: - rich.print(f'Loading monitor manifest from [bold]{manifest_path}[/bold]') + rich.print( + f'Loading monitor manifest from [bold]{manifest_path}[/bold]' + ) queue = mm.load_queue_for_monitoring(manifest_path) kwargs = {} try: @@ -358,6 +410,7 @@ class show(CommonShowRun): """ display a queue """ + __command__ = 'show' def run(config) -> None: @@ -369,20 +422,30 @@ class submit(CommonConfig): """ submit a job to a queue """ + __command__ = 'submit' - jobname = scfg.Value(None, help='for submit, this is the name of the new job') + jobname = scfg.Value( + None, help='for submit, this is the name of the new job' + ) depends = scfg.Value(None, help='comma separated jobnames to depend on') - command = scfg.Value(None, type=str, position=2, nargs='*', help=ub.paragraph( - ''' + command = scfg.Value( + None, + type=str, + position=2, + nargs='*', + help=ub.paragraph( + """ Specifies the bash command to queue. Care must be taken when specifying this argument. If specifying as a key/value pair argument, it is important to quote and escape the bash command properly. A more convenient way to specify this command is as a positional argument. End all of the options to this CLI with `--` and then specify your full command. - ''')) + """ + ), + ) def run(config) -> None: r""" @@ -431,6 +494,7 @@ def run(config) -> None: ub.cmd('cmd_queue test-queue') """ import json + # Run a new CLI queue data = json.loads(config.cli_queue_fpath.read_text()) row = {'type': 'command', 'command': config['command']} @@ -445,11 +509,16 @@ class new(CommonConfig): """ create a new queue """ + __command__ = 'new' - header = scfg.Value(None, help='a header command to execute in every session (e.g. activating a virtualenv). Only used when action is new') + header = scfg.Value( + None, + help='a header command to execute in every session (e.g. activating a virtualenv). Only used when action is new', + ) def run(config) -> None: import json + # Start a new CLI queue data = [] config = config @@ -464,9 +533,13 @@ class list(CommonConfig): """ display available queues """ + __command__ = 'list' + def run(config) -> None: - print(ub.urepr(list(config.cli_queue_dpath.glob('*.cmd_queue.json')))) + print( + ub.urepr(list(config.cli_queue_dpath.glob('*.cmd_queue.json'))) + ) main: Callable[..., Any] = CmdQueueCLI.main diff --git a/cmd_queue/monitor_app.py b/cmd_queue/monitor_app.py index c7d95d4..a7df0c0 100644 --- a/cmd_queue/monitor_app.py +++ b/cmd_queue/monitor_app.py @@ -27,7 +27,6 @@ class JobTable(Widget): # type: ignore - def __init__( self, table_fn: Optional[Callable[[], Tuple[Any, bool, Any]]] = None, @@ -87,9 +86,11 @@ def demo(CmdQueueMonitorApp) -> CmdQueueMonitorApp: >>> print(f'self.graceful_exit={self.graceful_exit}') """ countdown = 10 + def demo_table_fn(): nonlocal countdown import random + r = random.random() columns = ['name', 'status', 'passed', 'errors', 'total'] table = rich.table.Table() @@ -108,10 +109,11 @@ def demo_table_fn(): finished = countdown <= 0 agg_state = None return table, finished, agg_state + return CmdQueueMonitorApp(demo_table_fn) async def on_load(self, event: Any) -> None: - await self.bind("q", "quit", "Quit") + await self.bind('q', 'quit', 'Quit') async def action_quit(self) -> None: await self.shutdown() @@ -131,8 +133,8 @@ async def on_mount(self, event: Any) -> None: # vlayout.add(text) # vlayout.add(table_view) - await view.dock(header, edge="top") - await view.dock(footer, edge="bottom") + await view.dock(header, edge='top') + await view.dock(footer, edge='bottom') await view.dock(table_view) # await view.dock(scrollview2) diff --git a/cmd_queue/monitor_manifest.py b/cmd_queue/monitor_manifest.py index ebc004f..455790c 100644 --- a/cmd_queue/monitor_manifest.py +++ b/cmd_queue/monitor_manifest.py @@ -122,9 +122,11 @@ def load_queue_for_monitoring(manifest_path: Any) -> Any: backend = manifest['backend'] if backend == 'tmux': from cmd_queue import tmux_queue + return tmux_queue.TMUXMultiQueue._from_manifest(manifest) elif backend == 'slurm': from cmd_queue import slurm_queue + return slurm_queue.SlurmQueue._from_manifest(manifest) else: raise NotImplementedError( diff --git a/cmd_queue/serial_queue.py b/cmd_queue/serial_queue.py index 3d87b9d..25d4f16 100644 --- a/cmd_queue/serial_queue.py +++ b/cmd_queue/serial_queue.py @@ -86,6 +86,7 @@ class BashJob(base_queue.Job): >>> self = BashJob('echo hi', 'myjob') >>> self.print_commands(with_status=True, with_gaurds=True) """ + def __init__( self, command: str, @@ -103,7 +104,6 @@ def __init__( preamble: Optional[List[str]] = None, **kwargs: Any, ) -> None: - if depends is not None and not ub.iterable(depends): depends = [depends] # type: ignore self.name = name @@ -151,7 +151,6 @@ def finalize_text( conditionals: Optional[Dict[str, List[str]]] = None, **kwargs: Any, ) -> str: - # Note: with_gaurds are the +- e and +-x bash behaviors, it is not a # great name. with_status is used to dump extra metadata out. These add # a lot of bash boilerplate, which can make the script more difficult @@ -177,7 +176,7 @@ def finalize_text( 'on_skip': [ f'mkdir -p {self.skip_fpath.parent}', f'printf "skip" > {self.skip_fpath}', - ] + ], } # Append custom conditionals @@ -220,8 +219,9 @@ def finalize_text( json_fmt_parts += [ ('logs', '"%s"', self.log_fpath), ] - dump_pre_status = util_bash.bash_json_dump(json_fmt_parts, - self.stat_fpath) + dump_pre_status = util_bash.bash_json_dump( + json_fmt_parts, self.stat_fpath + ) script.append('# Mark job as running') script.append(dump_pre_status) @@ -240,13 +240,17 @@ def finalize_text( if self.cwd is not None: # If the directory doesn't exist, then the job is marked as failed. script.append('# Change to the specified directory') - script.append(f'{{ pushd "{self.cwd}" && CHDIR_OK=1; }} || CHDIR_OK=0') + script.append( + f'{{ pushd "{self.cwd}" && CHDIR_OK=1; }} || CHDIR_OK=0' + ) internal_conditionals.append('"$CHDIR_OK" == 1') if self.preamble: script.append('# Run preamble') preamble_str = ' && '.join(self.preamble) - script.append(f'{{ {preamble_str} && PREAMBLE_OK=1; }} || PREAMBLE_OK=0') + script.append( + f'{{ {preamble_str} && PREAMBLE_OK=1; }} || PREAMBLE_OK=0' + ) internal_conditionals.append('"$PREAMBLE_OK" == 1') if internal_conditionals: @@ -281,7 +285,9 @@ def finalize_text( # Tells bash to stop printing commands, but is clever in that it # captures the last return code and doesnt print this command. # Also set -e so our boilerplate is not allowed to fail - script.append('# Capture job return code, disable command echo, enable exit-on-error') + script.append( + '# Capture job return code, disable command echo, enable exit-on-error' + ) script.append('{ RETURN_CODE=$? ; set +x -e; } 2>/dev/null') # NOTE: ${PIPESTATUS[0]} is an alternative to $? if we want a # specific return code from a job chain @@ -326,23 +332,26 @@ def finalize_text( json_fmt_parts += [ ('logs', '"%s"', self.log_fpath), ] - dump_post_status = util_bash.bash_json_dump(json_fmt_parts, - self.stat_fpath) + dump_post_status = util_bash.bash_json_dump( + json_fmt_parts, self.stat_fpath + ) on_pass_part = indent(_job_conditionals['on_pass']) on_fail_part = indent(_job_conditionals['on_fail']) # RETURN_CODE=126 means dependencies were unmet; on_skip # already ran in the deps-failed branch above, so we don't # want to also mark the job as failed here. - conditional_body = '\n'.join([ - 'if [[ "$RETURN_CODE" == "0" ]]; then', - on_pass_part, - 'elif [[ "$RETURN_CODE" == "126" ]]; then', - ' : # job was skipped; on_skip already handled', - 'else', - on_fail_part, - 'fi' - ]) + conditional_body = '\n'.join( + [ + 'if [[ "$RETURN_CODE" == "0" ]]; then', + on_pass_part, + 'elif [[ "$RETURN_CODE" == "126" ]]; then', + ' : # job was skipped; on_skip already handled', + 'else', + on_fail_part, + 'fi', + ] + ) script.append('# Mark job as stopped') script.append(dump_post_status) script.append(conditional_body) @@ -398,11 +407,13 @@ def print_commands( """ style = base_queue.Queue._coerce_style(self, style, with_rich) - code = self.finalize_text(with_status=with_status, - with_gaurds=with_gaurds, **kwargs) + code = self.finalize_text( + with_status=with_status, with_gaurds=with_gaurds, **kwargs + ) if style == 'rich': from rich.syntax import Syntax from rich.console import Console + console = Console() console.print(Syntax(code, 'bash')) elif style == 'colors': @@ -489,7 +500,11 @@ def __init__( """ super().__init__() if rootid is None: - rootid = str(ub.timestamp().split('T')[0]) + '_' + ub.hash_data(uuid.uuid4())[0:8] + rootid = ( + str(ub.timestamp().split('T')[0]) + + '_' + + ub.hash_data(uuid.uuid4())[0:8] + ) self.name = name self.rootid = rootid if dpath is None: @@ -499,7 +514,9 @@ def __init__( self.unused_kwargs = kwargs self.fpath = self.dpath / (self.pathid + '.sh') - self.state_fpath = self.dpath / 'serial_queue_{}.txt'.format(self.pathid) + self.state_fpath = self.dpath / 'serial_queue_{}.txt'.format( + self.pathid + ) self.environ = environ self.header = '#!/bin/bash' # todo: handle different shells @@ -514,7 +531,7 @@ def __init__( @property def pathid(self) -> str: - """ A path-safe identifier for file names """ + """A path-safe identifier for file names""" return '{}_{}'.format(self.name, self.rootid) def __nice__(self) -> str: @@ -535,9 +552,11 @@ def order_jobs(self) -> None: """ # We need to ensure the jobs are in a topologoical order here. import networkx as nx + graph = self._dependency_graph() original_order = [j.name for j in self.jobs] from cmd_queue.util import util_networkx + if not util_networkx.is_topological_order(graph, original_order): # If not already topologically sorted, try to make the minimal # reordering to achieve it. @@ -565,6 +584,7 @@ def finalize_text( """ import cmd_queue + self.order_jobs() script = [self.header] script += ['# Written by cmd_queue {}'.format(cmd_queue.__version__)] @@ -575,15 +595,18 @@ def finalize_text( script.append('set -e') if with_status: - script.append(ub.codeblock( - f''' + script.append( + ub.codeblock( + f""" # Init state to keep track of job progress (( "_CMD_QUEUE_NUM_FAILED=0" )) || true (( "_CMD_QUEUE_NUM_PASSED=0" )) || true (( "_CMD_QUEUE_NUM_SKIPPED=0" )) || true _CMD_QUEUE_TOTAL={total} _CMD_QUEUE_STATUS="" - ''')) + """ + ) + ) old_status = None @@ -592,10 +615,13 @@ def _mark_status(status): # be careful with json formatting here if with_status: if old_status != status: - script.append(ub.codeblock( - ''' + script.append( + ub.codeblock( + """ _CMD_QUEUE_STATUS="{}" - ''').format(status)) + """ + ).format(status) + ) old_status = status @@ -609,8 +635,9 @@ def _mark_status(status): ('name', '"%s"', self.name), ('rootid', '"%s"', self.rootid), ] - dump_code = util_bash.bash_json_dump(json_fmt_parts, - self.state_fpath) + dump_code = util_bash.bash_json_dump( + json_fmt_parts, self.state_fpath + ) script.append('# Update queue status') script.append(dump_code) # script.append('cat ' + str(self.state_fpath)) @@ -634,8 +661,9 @@ def _command_exit(): _mark_status('set_environ') if with_gaurds: _command_enter() - script.extend([ - f'export {k}="{v}"' for k, v in self.environ.items()]) + script.extend( + [f'export {k}="{v}"' for k, v in self.environ.items()] + ) if with_gaurds: _command_exit() @@ -668,7 +696,9 @@ def _command_exit(): if job.bookkeeper: if with_locks: - script.append(job.finalize_text(with_status, with_gaurds)) + script.append( + job.finalize_text(with_status, with_gaurds) + ) else: if with_status: script.append('') @@ -677,18 +707,25 @@ def _command_exit(): _mark_status('run') - script.append(ub.codeblock( - ''' + script.append( + ub.codeblock( + """ # ### Command {} / {} - {} - ''').format(num + 1, total, job.name)) + """ + ).format(num + 1, total, job.name) + ) conditionals = { 'on_pass': '(( "_CMD_QUEUE_NUM_PASSED=_CMD_QUEUE_NUM_PASSED+1" )) || true', 'on_fail': '(( "_CMD_QUEUE_NUM_FAILED=_CMD_QUEUE_NUM_FAILED+1" )) || true', 'on_skip': '(( "_CMD_QUEUE_NUM_SKIPPED=_CMD_QUEUE_NUM_SKIPPED+1" )) || true', } - script.append(job.finalize_text(with_status, with_gaurds, conditionals)) + script.append( + job.finalize_text( + with_status, with_gaurds, conditionals + ) + ) if with_status: script.append('# ') script.append('#') @@ -793,18 +830,40 @@ def run( # TODO: can implement a monitor here for non-blocking mode detach = not block if mode == 'bash': - ub.cmd(f'bash {self.fpath}', verbose=verbose, check=True, - capture=capture, shell=shell, system=system, detach=detach) + ub.cmd( + f'bash {self.fpath}', + verbose=verbose, + check=True, + capture=capture, + shell=shell, + system=system, + detach=detach, + ) elif mode == 'source': - ub.cmd(f'source {self.fpath}', verbose=verbose, check=True, - capture=capture, shell=shell, system=system, detach=detach) + ub.cmd( + f'source {self.fpath}', + verbose=verbose, + check=True, + capture=capture, + shell=shell, + system=system, + detach=detach, + ) else: - ub.cmd(f'{mode} {self.fpath}', verbose=verbose, check=True, - capture=capture, shell=shell, system=system, detach=detach) + ub.cmd( + f'{mode} {self.fpath}', + verbose=verbose, + check=True, + capture=capture, + shell=shell, + system=system, + detach=detach, + ) # raise KeyError def job_details(self) -> None: import json + for job in self.jobs: print('+--------') print(f'job={job}') @@ -817,6 +876,7 @@ def job_details(self) -> None: def read_state(self) -> Dict[str, Any]: import json import time + max_attempts = 100 num_attempts = 0 while True: @@ -869,6 +929,7 @@ def indent(text: Any, prefix: str = ' ') -> str: def _check_bash_text_for_syntax_errors(bash_text: str) -> None: import tempfile + tmpdir = tempfile.TemporaryDirectory() with tmpdir: dpath = ub.Path(tmpdir.name) diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index faedfce..76e0621 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -1,4 +1,5 @@ from __future__ import annotations + r""" Work in progress. The idea is to provide a TMUX queue and a SLURM queue that provide a common high level API, even though functionality might diverge, the @@ -55,6 +56,7 @@ @cache def _unit_registery() -> Any: import sys + if sys.version_info[0:2] == (3, 9): # backwards compatibility support for numpy 2.0 and pint on cp39 try: @@ -65,6 +67,7 @@ def _unit_registery() -> Any: if not np.__version__.startswith('1.'): np.cumproduct = np.cumprod import pint + reg = pint.UnitRegistry() return reg @@ -232,6 +235,7 @@ class SlurmJob(base_queue.Job): >>> command = self._build_command() >>> print(command) """ + def __init__( self, command: str, @@ -250,6 +254,7 @@ def __init__( super().__init__() if name is None: import uuid + name = 'job-' + str(uuid.uuid4()) if depends is not None and not ub.iterable(depends): depends = [depends] # type: ignore @@ -282,8 +287,10 @@ def _build_command( jobname_to_varname: Optional[Dict[str, str]] = None, global_preamble: Optional[List[str]] = None, ) -> str: - args = self._build_sbatch_args(jobname_to_varname=jobname_to_varname, - global_preamble=global_preamble) + args = self._build_sbatch_args( + jobname_to_varname=jobname_to_varname, + global_preamble=global_preamble, + ) return ' \\\n '.join(args) def _build_sbatch_args( @@ -301,15 +308,19 @@ def _build_sbatch_args( sbatch_args.append(f'--mem={mem}') if self.gpus and 'gres' not in self._sbatch_kvargs: ub.schedule_deprecation( - 'cmd_queue', name='gres', type='argument', + 'cmd_queue', + name='gres', + type='argument', migration=ub.paragraph( - ''' + """ the handling of gres here is broken and will be changed in the future. For now specify gres explicitly in slurm_options or the kwargs for the queue. - '''), - deprecate='now' + """ + ), + deprecate='now', ) + # NOTE: the handling of gres here is broken and will be changed in # the future. For now specify gres explicitly in slurm_options def _coerce_gres(gpus): @@ -322,6 +333,7 @@ def _coerce_gres(gpus): else: raise TypeError(type(self.gpus)) return gres + gres = _coerce_gres(self.gpus) sbatch_args.append(f'--gres="{gres}"') if self.output_fpath: @@ -342,13 +354,18 @@ def _coerce_gres(gpus): type_to_dependencies = { 'afterok': [], } - depends = self.depends if ub.iterable(self.depends) else [self.depends] + depends = ( + self.depends if ub.iterable(self.depends) else [self.depends] + ) for item in depends: if isinstance(item, SlurmJob): jobid = item.jobid if jobid is None and item.name: - if jobname_to_varname and item.name in jobname_to_varname: + if ( + jobname_to_varname + and item.name in jobname_to_varname + ): jobid = '${%s}' % jobname_to_varname[item.name] else: jobid = f"$(squeue --noheader --format %i --name '{item.name}')" @@ -381,6 +398,7 @@ def _coerce_gres(gpus): sbatch_args.append(f'"--begin={self.begin}"') import shlex + _preamble = [] if global_preamble: _preamble.extend(global_preamble) @@ -448,6 +466,7 @@ class SlurmQueue(base_queue.Queue): >>> job5 = self.submit('echo "$FOO"') >>> self.print_commands() """ + def __init__( self, name: Optional[str] = None, @@ -458,13 +477,16 @@ def __init__( super().__init__() import uuid import time + self.jobs = [] if name is None: name = 'SQ' self.name = name stamp = time.strftime('%Y%m%dT%H%M%S') self.unused_kwargs = kwargs - self.queue_id = name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8] + self.queue_id = ( + name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8] + ) self.dpath = ub.Path.appdir('cmd_queue/slurm') / self.queue_id if 0: # hack for submission on different systems, probably dont want to @@ -495,6 +517,7 @@ def _slurm_checks() -> None: status['has_squeue'] = bool(info['squeue_fpath']) status['slurmd_running'] = False import psutil + for p in psutil.process_iter(): if p.name() == 'slurmd': status['slurmd_running'] = True @@ -505,19 +528,23 @@ def _slurm_checks() -> None: 'create_time': p.create_time(), } break - status['squeue_working'] = (ub.cmd('squeue')['ret'] == 0) + status['squeue_working'] = ub.cmd('squeue')['ret'] == 0 sinfo = ub.cmd('sinfo --json') status['sinfo_working'] = False if sinfo['ret'] == 0: import json + status['sinfo_working'] = True - status['sinfo_version_str'] = ub.cmd('sinfo --version').stdout.strip().split(' ')[1] + status['sinfo_version_str'] = ( + ub.cmd('sinfo --version').stdout.strip().split(' ')[1] + ) sinfo_out = json.loads(sinfo['out']) nodes = sinfo_out['nodes'] node_states = [node['state'] for node in nodes] has_working_nodes = not all( - 'down' in str(state).lower() for state in node_states) + 'down' in str(state).lower() for state in node_states + ) status['has_working_nodes'] = has_working_nodes @staticmethod @@ -527,20 +554,26 @@ def is_available() -> bool: """ if ub.find_exe('squeue'): import psutil - slurmd_running = any(p.name() == 'slurmd' for p in psutil.process_iter()) + + slurmd_running = any( + p.name() == 'slurmd' for p in psutil.process_iter() + ) if slurmd_running: - squeue_working = (ub.cmd('squeue')['ret'] == 0) + squeue_working = ub.cmd('squeue')['ret'] == 0 if squeue_working: # Check if nodes are available or down # note: the --json command is not available in # slurm-wlm 19.05.5, but it is in slurm-wlm 21.08.5 - sinfo_version_str = ub.cmd('sinfo --version').stdout.strip().split(' ')[1] + sinfo_version_str = ( + ub.cmd('sinfo --version').stdout.strip().split(' ')[1] + ) sinfo_major_version = int(sinfo_version_str.split('.')[0]) if sinfo_major_version < 21: # Dont check in this case return True else: import json + # sinfo --json changed between v22 and v23 # https://github.com/SchedMD/slurm/blob/slurm-23.02/RELEASE_NOTES#L230 if sinfo_major_version >= 21: @@ -562,10 +595,13 @@ def is_available() -> bool: node_states = [node['state'] for node in nodes] if sinfo_major_version > 21: has_working_nodes = not all( - 'down' in str(state).lower() for state in node_states) + 'down' in str(state).lower() + for state in node_states + ) else: has_working_nodes = not all( - 'DOWN' in state for state in node_states) + 'DOWN' in state for state in node_states + ) if has_working_nodes: return True @@ -618,7 +654,8 @@ def submit( depends = [depends] depends = [ self.named_jobs[dep] if isinstance(dep, str) else dep - for dep in depends] + for dep in depends + ] _kwargs = self._sbatch_kvargs | kwargs job = SlurmJob(command, depends=depends, preamble=preamble, **_kwargs) @@ -635,6 +672,7 @@ def order_jobs(self) -> List[SlurmJob]: List[SlurmJob]: ordered jobs """ import networkx as nx + graph = self._dependency_graph() if 0: print(nx.forest_str(nx.minimum_spanning_arborescence(graph))) @@ -644,7 +682,9 @@ def order_jobs(self) -> List[SlurmJob]: new_order.append(job) return new_order - def finalize_text(self, exclude_tags: Optional[Any] = None, **kwargs: Any) -> str: + def finalize_text( + self, exclude_tags: Optional[Any] = None, **kwargs: Any + ) -> str: """ Serialize the state of the queue into a bash script. @@ -667,7 +707,8 @@ def finalize_text(self, exclude_tags: Optional[Any] = None, **kwargs: Any) -> st # args = job._build_sbatch_args(jobname_to_varname) # command = ' '.join(args) command = job._build_command( - jobname_to_varname, global_preamble=global_preamble) + jobname_to_varname, global_preamble=global_preamble + ) if 1: varname = 'JOB_{:03d}'.format(len(jobname_to_varname)) command = f'{varname}=$({command} --parsable)' @@ -680,6 +721,7 @@ def finalize_text(self, exclude_tags: Optional[Any] = None, **kwargs: Any) -> st # Build a command to dump the job-ids for this queue to disk to # allow us to track them in the monitor. from cmd_queue.util import util_bash + json_fmt_parts = [ (job_varname, '%s', '$' + job_varname) for job_varname in self.jobname_to_varname.values() @@ -725,6 +767,7 @@ def run( return self.monitor(onfail=onfail, onexit=onexit) if monitor == 'none': from rich import print as rich_print + rich_print( '[bold]Queue running detached.[/bold] ' f'Reattach with: cmd_queue monitor --manifest={manifest_path}' @@ -733,12 +776,15 @@ def run( if monitor == 'tmux': if not ub.find_exe('tmux'): import warnings + warnings.warn( "monitor='tmux' requested but tmux not found; " - "falling back to inline monitor.") + 'falling back to inline monitor.' + ) return self.monitor(onfail=onfail, onexit=onexit) from cmd_queue.tmux_queue import has_stdin from cmd_queue.util.util_tmux import tmux as _tmux + extra_args = [] if onfail: extra_args.append(f'--onfail={onfail}') @@ -746,6 +792,7 @@ def run( extra_args.append(f'--onexit={onexit}') session_name = f'cmdq-monitor-{self.queue_id}' from rich import print as rich_print + rich_print( f'[bold]Launching monitor in tmux session[/bold] {session_name}' ) @@ -829,16 +876,19 @@ def monitor( from rich.table import Table import io import pandas as pd + jobid_history = set() num_at_start = None job_status_table = None if self.jobid_fpath is not None: - class UnableToMonitor(Exception): - ... + + class UnableToMonitor(Exception): ... + try: import json + if not self.jobid_fpath.exists(): raise UnableToMonitor jobid_lut = json.loads(self.jobid_fpath.read_text()) @@ -856,6 +906,7 @@ class UnableToMonitor(Exception): def update_jobid_status(): import rich + assert job_status_table is not None for row in job_status_table: if row['needs_update']: @@ -868,7 +919,7 @@ def update_jobid_status(): if info['JobState'].startswith('FAILED'): row['status'] = 'failed' rich.print(f'[red] Failed job: {info["JobName"]}') - if info["StdErr"] == info["StdOut"]: + if info['StdErr'] == info['StdOut']: rich.print(f'[red] * Logs: {info["StdErr"]}') else: rich.print(f'[red] StdErr: {info["StdErr"]}') @@ -918,7 +969,9 @@ def update_status_table(): # kills jobs too fast and not when they are in a dependency state not a # a never satisfied state. Killing these jobs here seems to fix # it. - broken_jobs = df[df['NODELIST(REASON)'] == '(DependencyNeverSatisfied)'] + broken_jobs = df[ + df['NODELIST(REASON)'] == '(DependencyNeverSatisfied)' + ] if len(broken_jobs): for name in broken_jobs['NAME']: ub.cmd(f'scancel --name="{name}"') @@ -928,7 +981,9 @@ def update_status_table(): if job_status_table is not None: update_jobid_status() - state = ub.dict_hist([row['status'] for row in job_status_table]) + state = ub.dict_hist( + [row['status'] for row in job_status_table] + ) state.setdefault('passed', 0) state.setdefault('failed', 0) state.setdefault('skipped', 0) @@ -938,34 +993,52 @@ def update_status_table(): state['total'] = len(job_status_table) state['other'] = state['total'] - ( - state['passed'] + state['failed'] + state['skipped'] + - state['running'] + state['pending'] + state['passed'] + + state['failed'] + + state['skipped'] + + state['running'] + + state['pending'] ) pass_color = '' fail_color = '' skip_color = '' - finished = (state['pending'] + state['unknown'] + state['running'] == 0) - if (state['failed'] > 0): + finished = ( + state['pending'] + state['unknown'] + state['running'] == 0 + ) + if state['failed'] > 0: fail_color = '[red]' - if (state['skipped'] > 0): + if state['skipped'] > 0: skip_color = '[yellow]' if finished: pass_color = '[green]' - header = ['passed', 'failed', 'skipped', 'running', 'pending', 'other', 'total'] + header = [ + 'passed', + 'failed', + 'skipped', + 'running', + 'pending', + 'other', + 'total', + ] row_values = [ - f"{pass_color}{state['passed']}", - f"{fail_color}{state['failed']}", - f"{skip_color}{state['skipped']}", - f"{state['running']}", - f"{state['pending']}", - f"{state['other']}", - f"{state['total']}", + f'{pass_color}{state["passed"]}', + f'{fail_color}{state["failed"]}', + f'{skip_color}{state["skipped"]}', + f'{state["running"]}', + f'{state["pending"]}', + f'{state["other"]}', + f'{state["total"]}', ] else: # TODO: determine if slurm has accounting on, and if we can # figure out how many jobs errored / passed - header = ['num_running', 'num_in_queue', 'total_monitored', 'num_at_start'] + header = [ + 'num_running', + 'num_in_queue', + 'total_monitored', + 'num_at_start', + ] row_values = [ f'{num_running}', f'{num_in_queue}', @@ -975,10 +1048,9 @@ def update_status_table(): # row_values.append(str(state.get('FAIL', 0))) # row_values.append(str(state.get('SKIPPED', 0))) # row_values.append(str(state.get('PENDING', 0))) - finished = (num_in_queue == 0) + finished = num_in_queue == 0 - table = Table(*header, - title='slurm-monitor') + table = Table(*header, title='slurm-monitor') table.add_row(*row_values) @@ -1005,6 +1077,7 @@ def _update_agg_state() -> None: _update_agg_state() except KeyboardInterrupt: from rich.prompt import Confirm + flag = Confirm.ask('do you to kill the procs?') if flag: self.kill() @@ -1043,6 +1116,7 @@ def _build_monitor_manifest(self) -> Dict[str, Any]: def _write_monitor_manifest(self) -> Any: """Persist the monitor manifest to ``/monitor_manifest.json``.""" from cmd_queue import monitor_manifest as mm + path = mm.manifest_path_for_dpath(self.dpath) manifest = self._build_monitor_manifest() mm.write_manifest(manifest, path) @@ -1054,7 +1128,7 @@ def _write_monitor_manifest(self) -> Any: return path @classmethod - def _from_manifest(cls, manifest: Dict[str, Any]) -> "SlurmQueue": + def _from_manifest(cls, manifest: Dict[str, Any]) -> 'SlurmQueue': """Reconstruct a queue suitable for ``monitor()`` / ``kill()`` only.""" self = cls.__new__(cls) base_queue.Queue.__init__(self) @@ -1150,11 +1224,19 @@ def parse_scontrol_output(output: str) -> dict: parse_scontrol_output(output) """ import re + # These keys should be the last key on a line. They are allowed to contain # space and equal characters. special_keys = [ - 'JobName', 'WorkDir', 'StdErr', 'StdIn', 'StdOut', 'Command', - 'NodeList', 'BatchHost', 'Partition' + 'JobName', + 'WorkDir', + 'StdErr', + 'StdIn', + 'StdOut', + 'Command', + 'NodeList', + 'BatchHost', + 'Partition', ] patterns = '(' + '|'.join(f' {re.escape(k)}=' for k in special_keys) + ')' pat = re.compile(patterns) @@ -1170,7 +1252,7 @@ def parse_scontrol_output(output: str) -> dict: # Special case: Key is a special key with a space startpos = match.start() leading_part = line[:startpos] - special_part = line[startpos + 1:] + special_part = line[startpos + 1 :] key, value = special_part.split('=', 1) parsed_data[key] = value.strip() line = leading_part diff --git a/cmd_queue/slurmify.py b/cmd_queue/slurmify.py index 7947d3d..e2c68d4 100644 --- a/cmd_queue/slurmify.py +++ b/cmd_queue/slurmify.py @@ -15,6 +15,7 @@ -- \ python -c 'import sys; print("hello world"); sys.exit(0)' """ + #!/usr/bin/env python3 import scriptconfig as scfg import ubelt as ub @@ -23,21 +24,35 @@ class SlurmifyCLI(scfg.DataConfig): __command__ = 'slurmify' - jobname = scfg.Value(None, help='for submit, this is the name of the new job') + jobname = scfg.Value( + None, help='for submit, this is the name of the new job' + ) depends = scfg.Value(None, help='comma separated jobnames to depend on') - command = scfg.Value(None, type=str, position=1, nargs='*', help=ub.paragraph( - ''' + command = scfg.Value( + None, + type=str, + position=1, + nargs='*', + help=ub.paragraph( + """ Specifies the bash command to queue. Care must be taken when specifying this argument. If specifying as a key/value pair argument, it is important to quote and escape the bash command properly. A more convenient way to specify this command is as a positional argument. End all of the options to this CLI with `--` and then specify your full command. - ''')) + """ + ), + ) - gpus = scfg.Value(None, help='a comma separated list of the gpu numbers to spread across. tmux backend only.') - workers = scfg.Value(1, help='number of concurrent queues for the tmux backend.') + gpus = scfg.Value( + None, + help='a comma separated list of the gpu numbers to spread across. tmux backend only.', + ) + workers = scfg.Value( + 1, help='number of concurrent queues for the tmux backend.' + ) mem = scfg.Value(None, help='') partition = scfg.Value(1, help='slurm partition') @@ -59,6 +74,7 @@ def main(cls, argv=1, **kwargs): """ import rich from rich.markup import escape + config = cls.cli(argv=argv, data=kwargs, strict=True) rich.print('config = ' + escape(ub.urepr(config, nl=1))) @@ -71,6 +87,7 @@ def main(cls, argv=1, **kwargs): row['depends'] = config.depends import cmd_queue + queue = cmd_queue.Queue.create( size=max(1, config['workers']), backend='slurm', @@ -87,13 +104,17 @@ def main(cls, argv=1, **kwargs): if len(bash_command) == 1: # hack import shlex + if shlex.quote(bash_command[0]) == bash_command[0]: bash_command = bash_command[0] else: bash_command = shlex.quote(bash_command[0]) else: import shlex - bash_command = ' '.join([shlex.quote(str(p)) for p in bash_command]) + + bash_command = ' '.join( + [shlex.quote(str(p)) for p in bash_command] + ) submitkw = ub.udict(row) & {'name', 'depends'} queue.submit(bash_command, log=False, **submitkw) except Exception: @@ -104,6 +125,7 @@ def main(cls, argv=1, **kwargs): # config.cli_queue_fpath.write_text(json.dumps(row)) # 'sbatch --job-name="test_job1" --output="$HOME/.cache/slurm/logs/job-%j-%x.out" --wrap="" + __cli__ = SlurmifyCLI if __name__ == '__main__': diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index fedd89d..b6b4a1d 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -199,6 +199,7 @@ class TMUXMultiQueue(base_queue.Queue): >>> if self.is_available(): >>> self.run(with_textual=False, check_other_sessions=0) """ + def __init__( self, size: int = 1, @@ -213,7 +214,11 @@ def __init__( super().__init__() if rootid is None: - rootid = str(ub.timestamp().split('T')[0]) + '_' + ub.hash_data(uuid.uuid4())[0:8] + rootid = ( + str(ub.timestamp().split('T')[0]) + + '_' + + ub.hash_data(uuid.uuid4())[0:8] + ) if name is None: name = 'unnamed' self.name = name @@ -229,7 +234,9 @@ def __init__( # Note: size can be changed as long as it happens before the queue is # written and run. if size <= 0: - raise ValueError(f'tmux queue size must be positive got size={size}') + raise ValueError( + f'tmux queue size must be positive got size={size}' + ) self.size = size self.environ = environ self.fpath = self.dpath / f'run_queues_{self.name}.sh' @@ -261,22 +268,28 @@ def is_available(cls) -> bool: def _new_workers(self, start: int = 0) -> List[serial_queue.SerialQueue]: import itertools as it + per_worker_environs = [self.environ] * self.size if self.gpus: # TODO: more sophisticated GPU policy? per_worker_environs = [ - ub.dict_union(e, { - 'CUDA_VISIBLE_DEVICES': str(cvd), - }) + ub.dict_union( + e, + { + 'CUDA_VISIBLE_DEVICES': str(cvd), + }, + ) for cvd, e in zip(it.cycle(self.gpus), per_worker_environs) ] workers = [ serial_queue.SerialQueue( - name='{}{}_{:03d}'.format(self._tmux_session_prefix, self.name, worker_idx), + name='{}{}_{:03d}'.format( + self._tmux_session_prefix, self.name, worker_idx + ), rootid=self.rootid, dpath=self.dpath, - environ=e + environ=e, ) for worker_idx, e in enumerate(per_worker_environs, start=start) ] @@ -285,7 +298,9 @@ def _new_workers(self, start: int = 0) -> List[serial_queue.SerialQueue]: def __nice__(self) -> str: return ub.urepr(self.jobs) - def _semaphore_wait_command(self, flag_fpaths: Iterable[str], msg: str) -> str: + def _semaphore_wait_command( + self, flag_fpaths: Iterable[str], msg: str + ) -> str: r""" TODO: use flock? or inotify? @@ -326,15 +341,16 @@ def _semaphore_wait_command(self, flag_fpaths: Iterable[str], msg: str) -> str: sleep 1; done printf "finished {msg} " - ''') + ''' + ) return command def _semaphore_signal_command(self, flag_fpath): return ub.codeblock( - f''' + f""" # Signal this worker is complete mkdir -p {flag_fpath.parent} && touch {flag_fpath} - ''' + """ ) def order_jobs(self) -> None: @@ -425,6 +441,7 @@ def order_jobs(self) -> None: >>> self.print_commands() """ import networkx as nx + graph = self._dependency_graph() # Get rid of implicit dependencies @@ -442,7 +459,7 @@ def order_jobs(self) -> None: print(nx.is_directed_acyclic_graph(graph)) simple_cycles = list(nx.cycles.simple_cycles(graph)) print('simple_cycles = {}'.format(ub.urepr(simple_cycles, nl=1))) - nx.write_network_text(graph, print, end="") + nx.write_network_text(graph, print, end='') raise in_cut_nodes = set() @@ -465,7 +482,9 @@ def order_jobs(self) -> None: cut_graph.remove_edges_from(cut_edges) # Get all the node groups disconnected by the cuts - condensed = nx.condensation(reduced_graph, nx.weakly_connected_components(cut_graph)) + condensed = nx.condensation( + reduced_graph, nx.weakly_connected_components(cut_graph) + ) # TODO: can we use nx.topological_generations for a more elegant # solution here? @@ -476,17 +495,24 @@ def order_jobs(self) -> None: condensed_order = list(nx.topological_sort(condensed)) for c_node in condensed_order: members = set(condensed.nodes[c_node]['members']) - ancestors = set(ub.flatten([nx.ancestors(reduced_graph, m) for m in members])) + ancestors = set( + ub.flatten([nx.ancestors(reduced_graph, m) for m in members]) + ) cut_in_ancestors = ancestors & in_cut_nodes cut_out_ancestors = ancestors & out_cut_nodes cut_in_members = members & in_cut_nodes - rank = len(cut_in_members) + len(cut_out_ancestors) + len(cut_in_ancestors) + rank = ( + len(cut_in_members) + + len(cut_out_ancestors) + + len(cut_in_ancestors) + ) for m in members: rankings[rank].update(members) if 0: from graphid.util import util_graphviz import kwplot + kwplot.autompl() util_graphviz.show_nx(graph, fnum=1) util_graphviz.show_nx(reduced_graph, fnum=3) @@ -507,10 +533,15 @@ def order_jobs(self) -> None: # Ranked bins # Solve a bin packing problem to partition these into self.size groups from cmd_queue.util.util_algo import balanced_number_partitioning + # Weighting by job heaviness would help here. group_weights = list(map(len, parallel_groups)) - groupxs = balanced_number_partitioning(group_weights, num_parts=self.size) - rank_groups = [list(ub.take(parallel_groups, gxs)) for gxs in groupxs] + groupxs = balanced_number_partitioning( + group_weights, num_parts=self.size + ) + rank_groups = [ + list(ub.take(parallel_groups, gxs)) for gxs in groupxs + ] rank_groups = [g for g in rank_groups if len(g)] # Reorder each group to better agree with submission order @@ -520,8 +551,12 @@ def order_jobs(self) -> None: for nodes in group: nodes_index = min(graph.nodes[n]['index'] for n in nodes) priorities.append(nodes_index) - final_queue_order = list(ub.flatten(ub.take(group, ub.argsort(priorities)))) - final_queue_jobs = [graph.nodes[n]['job'] for n in final_queue_order] + final_queue_order = list( + ub.flatten(ub.take(group, ub.argsort(priorities))) + ) + final_queue_jobs = [ + graph.nodes[n]['job'] for n in final_queue_order + ] rank_jobs.append(final_queue_jobs) ranked_job_groups.append(rank_jobs) @@ -534,7 +569,7 @@ def order_jobs(self) -> None: ranked_job_groups = [[serial_groups]] queue_workers = [] - flag_dpath = (self.dpath / 'semaphores') + flag_dpath = self.dpath / 'semaphores' prev_rank_flag_fpaths = None for rank, rank_jobs in enumerate(ranked_job_groups): # Hack, abuse init workers each time to construct workers @@ -544,7 +579,10 @@ def order_jobs(self) -> None: # Add a dummy job to wait for dependencies of this linear queue if prev_rank_flag_fpaths: - command = self._semaphore_wait_command(prev_rank_flag_fpaths, msg=f"wait for previous rank {rank - 1}") + command = self._semaphore_wait_command( + prev_rank_flag_fpaths, + msg=f'wait for previous rank {rank - 1}', + ) # Note: this should not be a real job worker.submit(command, bookkeeper=1) @@ -560,7 +598,10 @@ def order_jobs(self) -> None: rank_flag_fpaths = [] num_rank_workers = len(rank_workers) for worker_idx, worker in enumerate(rank_workers): - rank_flag_fpath = flag_dpath / f'rank_flag_{rank}_{worker_idx}_{num_rank_workers}.done' + rank_flag_fpath = ( + flag_dpath + / f'rank_flag_{rank}_{worker_idx}_{num_rank_workers}.done' + ) command = self._semaphore_signal_command(rank_flag_fpath) # Note: this should not be a real job worker.submit(command, bookkeeper=1) @@ -576,26 +617,32 @@ def order_jobs(self) -> None: def finalize_text(self, **kwargs: Any) -> str: self.order_jobs() # Create a driver script - driver_lines = [ub.codeblock( - f''' + driver_lines = [ + ub.codeblock( + f""" #!/bin/bash # Driver script to start the tmux-queue echo "Submitting {self.num_real_jobs} jobs to a tmux queue" - ''')] + """ + ) + ] for queue in self.workers: # run_command_in_tmux_queue(command, name) # TODO: figure out how to forward environment variables from the # running sessions. We dont want to log secrets to plaintext. part = ub.codeblock( - f''' + f""" ### Run Queue: {queue.pathid} with {len(queue)} jobs tmux new-session -d -s {queue.pathid} "bash" tmux send -t {queue.pathid} \\ "source {queue.fpath}" \\ Enter - ''').format() + """ + ).format() driver_lines.append(part) - driver_lines += [f'echo "Spread jobs across {len(self.workers)} tmux workers"'] + driver_lines += [ + f'echo "Spread jobs across {len(self.workers)} tmux workers"' + ] driver_text = '\n\n'.join(driver_lines) return driver_text @@ -611,7 +658,10 @@ def kill_other_queues(self, ask_first: bool = True) -> None: cmd_queue and kill them. """ import parse - queue_name_pattern = parse.Parser(self._tmux_session_prefix + '{name}_{rootid}') + + queue_name_pattern = parse.Parser( + self._tmux_session_prefix + '{name}_{rootid}' + ) current_sessions = self._tmux_current_sessions() other_session_ids = [] for info in current_sessions: @@ -621,7 +671,9 @@ def kill_other_queues(self, ask_first: bool = True) -> None: other_session_ids.append(info['id']) # print(f'other_session_ids={other_session_ids}') if other_session_ids: - print(f'Detected {len(other_session_ids)} other running cmd-queue sessions with the same name') + print( + f'Detected {len(other_session_ids)} other running cmd-queue sessions with the same name' + ) print('Commands to kill them:') kill_commands = [] for sess_id in other_session_ids: @@ -629,13 +681,17 @@ def kill_other_queues(self, ask_first: bool = True) -> None: print(command2) kill_commands.append(command2) from rich import prompt - if not ask_first or prompt.Confirm().ask('Do you want to kill the other sessions?'): + + if not ask_first or prompt.Confirm().ask( + 'Do you want to kill the other sessions?' + ): for command in kill_commands: ub.cmd(command, verbose=self.cmd_verbose) def handle_other_sessions(self, other_session_handler: str) -> None: if other_session_handler == 'auto': from cmd_queue.tmux_queue import has_stdin + if has_stdin(): other_session_handler = 'ask' else: @@ -694,7 +750,8 @@ def run( if check_other_sessions: ub.schedule_deprecation( - 'tmux_queue', 'check_other_sessions', 'argument') + 'tmux_queue', 'check_other_sessions', 'argument' + ) if check_other_sessions == 'auto': if not has_stdin(): check_other_sessions = False @@ -703,8 +760,12 @@ def run( self.write() manifest_path = self._write_monitor_manifest() - ub.cmd(f'bash {self.fpath}', verbose=self.cmd_verbose, check=True, - system=system) + ub.cmd( + f'bash {self.fpath}', + verbose=self.cmd_verbose, + check=True, + system=system, + ) if not block: return None return self._dispatch_monitor( @@ -717,6 +778,7 @@ def run( def _print_done_summary(self, agg_state: Dict[str, Any]) -> None: from rich import print as rich_print + failed = agg_state.get('failed', 0) passed = agg_state.get('passed', 0) skipped = agg_state.get('skipped', 0) @@ -740,11 +802,12 @@ def _print_done_summary(self, agg_state: Dict[str, Any]) -> None: any_log_missing = False for job in failed_jobs: log_fpath = getattr(job, 'log_fpath', None) - if (getattr(job, 'log', False) and log_fpath is not None - and log_fpath.exists()): - rich_print( - f' [red]{job.name}[/red] log: {log_fpath}' - ) + if ( + getattr(job, 'log', False) + and log_fpath is not None + and log_fpath.exists() + ): + rich_print(f' [red]{job.name}[/red] log: {log_fpath}') else: any_log_missing = True rich_print(f' [red]{job.name}[/red] [dim](no log)[/dim]') @@ -759,9 +822,7 @@ def _print_done_summary(self, agg_state: Dict[str, Any]) -> None: for job in skipped_jobs: reason = self._skip_reason(job, status_by_name) if reason: - rich_print( - f' [yellow]{job.name}[/yellow] ({reason})' - ) + rich_print(f' [yellow]{job.name}[/yellow] ({reason})') else: rich_print(f' [yellow]{job.name}[/yellow]') @@ -781,6 +842,7 @@ def _dispatch_monitor( ) if monitor == 'none': from rich import print as rich_print + rich_print( '[bold]Queue running detached.[/bold] ' f'Reattach with: cmd_queue monitor --manifest={manifest_path}' @@ -791,9 +853,11 @@ def _dispatch_monitor( if monitor == 'tmux': if not ub.find_exe('tmux'): import warnings + warnings.warn( "monitor='tmux' requested but tmux not found; " - "falling back to inline monitor.") + 'falling back to inline monitor.' + ) return self.monitor( with_textual=with_textual, onfail=onfail, @@ -806,6 +870,7 @@ def _dispatch_monitor( extra_args.append(f'--onexit={onexit}') session_name = f'cmdq-monitor-{self.pathid}' from rich import print as rich_print + rich_print( f'[bold]Launching monitor in tmux session[/bold] {session_name}' ) @@ -816,11 +881,13 @@ def _dispatch_monitor( verbose=0, extra_args=extra_args, ) + # Don't pull the user's terminal into the monitor session; let # them attach on demand and freely detach back to this shell. def _is_finished() -> bool: _, finished, _ = self._build_status_table() return finished + tmux.block_with_attach_prompt( session_name=session_name, is_finished_fn=_is_finished, @@ -841,6 +908,7 @@ def _headless_block_until_done(self, refresh_rate: float = 1.0) -> Any: running elsewhere (in a tmux session, or not at all). """ import time + while True: table, finished, agg_state = self._build_status_table() if finished: @@ -988,6 +1056,7 @@ def _textual_monitor(self): is_running = False else: from rich.prompt import Confirm + flag = Confirm.ask('do you to kill the procs?') if flag: self.kill() @@ -1061,15 +1130,20 @@ def _build_failed_jobs_renderable(self) -> Any: if failed: ftable = Table( - title='Failed jobs', title_style='bold red', - show_header=True, header_style='red', + title='Failed jobs', + title_style='bold red', + show_header=True, + header_style='red', ) ftable.add_column('name', style='red') ftable.add_column('log') for job in failed: log_fpath = getattr(job, 'log_fpath', None) - if (getattr(job, 'log', False) and log_fpath is not None - and log_fpath.exists()): + if ( + getattr(job, 'log', False) + and log_fpath is not None + and log_fpath.exists() + ): ftable.add_row(job.name, str(log_fpath)) else: any_log_missing = True @@ -1078,8 +1152,10 @@ def _build_failed_jobs_renderable(self) -> Any: if skipped: stable = Table( - title='Skipped jobs', title_style='bold yellow', - show_header=True, header_style='yellow', + title='Skipped jobs', + title_style='bold yellow', + show_header=True, + header_style='yellow', ) stable.add_column('name', style='yellow') stable.add_column('reason') @@ -1089,11 +1165,13 @@ def _build_failed_jobs_renderable(self) -> Any: renderables.append(stable) if any_log_missing: - renderables.append(Text( - 'Note: failure logs are not enabled for some failed ' - 'jobs (pass log=True at submit time).', - style='yellow', - )) + renderables.append( + Text( + 'Note: failure logs are not enabled for some failed ' + 'jobs (pass log=True at submit time).', + style='yellow', + ) + ) if len(renderables) == 1: return renderables[0] @@ -1101,6 +1179,7 @@ def _build_failed_jobs_renderable(self) -> Any: def _build_live_renderable(self): from rich.console import Group + table, finished, agg_state = self._build_status_table() failed = self._build_failed_jobs_renderable() renderable = Group(table, failed) if failed is not None else table @@ -1109,6 +1188,7 @@ def _build_live_renderable(self): def _simple_rich_monitor(self, refresh_rate=0.4): import time from rich.live import Live + if 0: print('Kill commands:') for command in self._kill_commands(): @@ -1124,15 +1204,24 @@ def _simple_rich_monitor(self, refresh_rate=0.4): live.update(renderable) except KeyboardInterrupt: from rich.prompt import Confirm + flag = Confirm.ask('do you to kill the procs?') if flag: self.kill() def _build_status_table(self): from rich.table import Table + # https://rich.readthedocs.io/en/stable/live.html table = Table() - columns = ['tmux session name', 'status', 'passed', 'failed', 'skipped', 'total'] + columns = [ + 'tmux session name', + 'status', + 'passed', + 'failed', + 'skipped', + 'total', + ] for col in columns: table.add_column(col) @@ -1143,7 +1232,7 @@ def _build_status_table(self): 'failed': 0, 'passed': 0, 'skipped': 0, - 'total': 0 + 'total': 0, } for worker in self.workers: @@ -1155,12 +1244,12 @@ def _build_status_table(self): finished = False pass_color = '[yellow]' else: - finished &= (state['status'] == 'done') + finished &= state['status'] == 'done' if state['status'] == 'done': pass_color = '[green]' - if (state['failed'] > 0): + if state['failed'] > 0: fail_color = '[red]' - if (state['skipped'] > 0): + if state['skipped'] > 0: skip_color = '[yellow]' agg_state['total'] += state['total'] @@ -1171,10 +1260,10 @@ def _build_status_table(self): table.add_row( state['name'], state['status'], - f"{pass_color}{state['passed']}", - f"{fail_color}{state['failed']}", - f"{skip_color}{state['skipped']}", - f"{state['total']}", + f'{pass_color}{state["passed"]}', + f'{fail_color}{state["failed"]}', + f'{skip_color}{state["skipped"]}', + f'{state["total"]}', ) if not finished: @@ -1186,10 +1275,10 @@ def _build_status_table(self): table.add_row( agg_state['name'], agg_state['status'], - f"{agg_state['passed']}", - f"{agg_state['failed']}", - f"{agg_state['skipped']}", - f"{agg_state['total']}", + f'{agg_state["passed"]}', + f'{agg_state["failed"]}', + f'{agg_state["skipped"]}', + f'{agg_state["total"]}', ) return table, finished, agg_state @@ -1250,7 +1339,9 @@ def current_output(self) -> None: for queue in self.workers: print('\n\nqueue = {!r}'.format(queue)) # First print out the contents for debug - tmux.capture_pane(target_session=queue.pathid, verbose=self.cmd_verbose) + tmux.capture_pane( + target_session=queue.pathid, verbose=self.cmd_verbose + ) def _print_commands(self): # First print out the contents for debug @@ -1287,27 +1378,32 @@ def _build_monitor_manifest(self) -> Dict[str, Any]: log_fpath = getattr(job, 'log_fpath', None) depends = getattr(job, 'depends', None) or [] depends_names = [ - getattr(d, 'name', None) for d in depends + getattr(d, 'name', None) + for d in depends if d is not None and getattr(d, 'name', None) ] - jobs_info.append({ - 'name': getattr(job, 'name', None), - 'log': bool(getattr(job, 'log', False)), - 'fail_fpath': str(fail_fpath) if fail_fpath else None, - 'skip_fpath': str(skip_fpath) if skip_fpath else None, - 'log_fpath': str(log_fpath) if log_fpath else None, - 'depends': depends_names, - }) - workers_info.append({ - 'name': worker.name, - 'rootid': worker.rootid, - 'dpath': str(worker.dpath), - 'pathid': worker.pathid, - 'state_fpath': str(worker.state_fpath), - 'fpath': str(worker.fpath), - 'environ': dict(worker.environ or {}), - 'jobs': jobs_info, - }) + jobs_info.append( + { + 'name': getattr(job, 'name', None), + 'log': bool(getattr(job, 'log', False)), + 'fail_fpath': str(fail_fpath) if fail_fpath else None, + 'skip_fpath': str(skip_fpath) if skip_fpath else None, + 'log_fpath': str(log_fpath) if log_fpath else None, + 'depends': depends_names, + } + ) + workers_info.append( + { + 'name': worker.name, + 'rootid': worker.rootid, + 'dpath': str(worker.dpath), + 'pathid': worker.pathid, + 'state_fpath': str(worker.state_fpath), + 'fpath': str(worker.fpath), + 'environ': dict(worker.environ or {}), + 'jobs': jobs_info, + } + ) return { 'backend': 'tmux', 'name': self.name, @@ -1324,6 +1420,7 @@ def _build_monitor_manifest(self) -> Dict[str, Any]: def _write_monitor_manifest(self) -> Any: """Persist the monitor manifest to ``/monitor_manifest.json``.""" from cmd_queue import monitor_manifest as mm + path = mm.manifest_path_for_dpath(self.dpath) manifest = self._build_monitor_manifest() mm.write_manifest(manifest, path) @@ -1331,14 +1428,16 @@ def _write_monitor_manifest(self) -> Any: return path @classmethod - def _from_manifest(cls, manifest: Dict[str, Any]) -> "TMUXMultiQueue": + def _from_manifest(cls, manifest: Dict[str, Any]) -> 'TMUXMultiQueue': """Reconstruct a queue suitable for ``monitor()`` / ``kill()`` only.""" self = cls.__new__(cls) # Initialize the base Queue state without re-creating workers / dpaths. base_queue.Queue.__init__(self) self.name = manifest['name'] self.rootid = manifest['rootid'] - self.pathid = manifest.get('pathid', '{}_{}'.format(self.name, self.rootid)) + self.pathid = manifest.get( + 'pathid', '{}_{}'.format(self.name, self.rootid) + ) self.dpath = ub.Path(manifest['dpath']) self.fpath = ub.Path(manifest['fpath']) self.size = manifest['size'] @@ -1350,6 +1449,7 @@ def _from_manifest(cls, manifest: Dict[str, Any]) -> "TMUXMultiQueue": self.preamble = [] self.jobs = [] import types + workers = [] for w in manifest.get('workers', []): worker = serial_queue.SerialQueue( @@ -1363,14 +1463,22 @@ def _from_manifest(cls, manifest: Dict[str, Any]) -> "TMUXMultiQueue": # the attributes the failed-jobs renderer reads. stubs = [] for j in w.get('jobs') or []: - stubs.append(types.SimpleNamespace( - name=j.get('name'), - log=bool(j.get('log', False)), - fail_fpath=ub.Path(j['fail_fpath']) if j.get('fail_fpath') else None, - skip_fpath=ub.Path(j['skip_fpath']) if j.get('skip_fpath') else None, - log_fpath=ub.Path(j['log_fpath']) if j.get('log_fpath') else None, - depends=list(j.get('depends') or []), - )) + stubs.append( + types.SimpleNamespace( + name=j.get('name'), + log=bool(j.get('log', False)), + fail_fpath=ub.Path(j['fail_fpath']) + if j.get('fail_fpath') + else None, + skip_fpath=ub.Path(j['skip_fpath']) + if j.get('skip_fpath') + else None, + log_fpath=ub.Path(j['log_fpath']) + if j.get('log_fpath') + else None, + depends=list(j.get('depends') or []), + ) + ) worker.jobs = stubs workers.append(worker) self.workers = workers @@ -1379,6 +1487,7 @@ def _from_manifest(cls, manifest: Dict[str, Any]) -> "TMUXMultiQueue": def has_stdin() -> bool: import sys + try: sys.stdin.fileno() except Exception: @@ -1390,6 +1499,7 @@ def has_stdin() -> bool: try: import textual # NOQA from cmd_queue.monitor_app import CmdQueueMonitorApp + if not hasattr(CmdQueueMonitorApp, 'run'): raise ImportError('Current textual monitor is broken on new versions') except ImportError: diff --git a/cmd_queue/util/__init__.py b/cmd_queue/util/__init__.py index 464dec2..ea91353 100644 --- a/cmd_queue/util/__init__.py +++ b/cmd_queue/util/__init__.py @@ -1,29 +1,32 @@ - def lazy_import(module_name, submodules, submod_attrs): import importlib import os + name_to_submod = { - func: mod for mod, funcs in submod_attrs.items() - for func in funcs + func: mod for mod, funcs in submod_attrs.items() for func in funcs } def __getattr__(name): if name in submodules: attr = importlib.import_module( '{module_name}.{name}'.format( - module_name=module_name, name=name) + module_name=module_name, name=name + ) ) elif name in name_to_submod: submodname = name_to_submod[name] module = importlib.import_module( '{module_name}.{submodname}'.format( - module_name=module_name, submodname=submodname) + module_name=module_name, submodname=submodname + ) ) attr = getattr(module, name) else: raise AttributeError( 'No {module_name} attribute {name}'.format( - module_name=module_name, name=name)) + module_name=module_name, name=name + ) + ) globals()[name] = attr return attr @@ -51,4 +54,5 @@ def __getattr__(name): def __dir__(): return __all__ + __all__ = ['textual_extensions', 'util_algo', 'util_networkx'] diff --git a/cmd_queue/util/richer.py b/cmd_queue/util/richer.py index 10b0d2d..915fbc9 100644 --- a/cmd_queue/util/richer.py +++ b/cmd_queue/util/richer.py @@ -23,28 +23,32 @@ def lazy_import( ) -> Callable[[str], Any]: import importlib import os + name_to_submod = { - func: mod for mod, funcs in submod_attrs.items() - for func in funcs + func: mod for mod, funcs in submod_attrs.items() for func in funcs } def __getattr__(name: str) -> Any: if name in submodules: attr = importlib.import_module( '{module_name}.{name}'.format( - module_name=module_name, name=name) + module_name=module_name, name=name + ) ) elif name in name_to_submod: submodname = name_to_submod[name] module = importlib.import_module( '{module_name}.{submodname}'.format( - module_name=module_name, submodname=submodname) + module_name=module_name, submodname=submodname + ) ) attr = getattr(module, name) else: raise AttributeError( 'No {module_name} attribute {name}'.format( - module_name=module_name, name=name)) + module_name=module_name, name=name + ) + ) globals()[name] = attr return attr @@ -125,14 +129,66 @@ def __getattr__(name: str) -> Any: def __dir__() -> list[str]: return __all__ -__all__: list[str] = ['abc', 'align', 'ansi', 'bar', 'box', 'cells', 'color', - 'color_triplet', 'columns', 'console', 'constrain', 'containers', - 'control', 'default_styles', 'diagnose', 'emoji', 'errors', - 'file_proxy', 'filesize', 'get_console', 'highlighter', 'inspect', - 'json', 'jupyter', 'layout', 'live', 'live_render', 'logging', - 'markdown', 'markup', 'measure', 'padding', 'pager', 'palette', - 'panel', 'pretty', 'print', 'progress', 'progress_bar', 'prompt', - 'protocol', 'reconfigure', 'region', 'repr', 'rule', 'scope', - 'screen', 'segment', 'spinner', 'status', 'style', 'styled', - 'syntax', 'table', 'terminal_theme', 'text', 'theme', 'themes', - 'traceback', 'tree'] + +__all__: list[str] = [ + 'abc', + 'align', + 'ansi', + 'bar', + 'box', + 'cells', + 'color', + 'color_triplet', + 'columns', + 'console', + 'constrain', + 'containers', + 'control', + 'default_styles', + 'diagnose', + 'emoji', + 'errors', + 'file_proxy', + 'filesize', + 'get_console', + 'highlighter', + 'inspect', + 'json', + 'jupyter', + 'layout', + 'live', + 'live_render', + 'logging', + 'markdown', + 'markup', + 'measure', + 'padding', + 'pager', + 'palette', + 'panel', + 'pretty', + 'print', + 'progress', + 'progress_bar', + 'prompt', + 'protocol', + 'reconfigure', + 'region', + 'repr', + 'rule', + 'scope', + 'screen', + 'segment', + 'spinner', + 'status', + 'style', + 'styled', + 'syntax', + 'table', + 'terminal_theme', + 'text', + 'theme', + 'themes', + 'traceback', + 'tree', +] diff --git a/cmd_queue/util/texter.py b/cmd_queue/util/texter.py index 0180539..aa6dff4 100644 --- a/cmd_queue/util/texter.py +++ b/cmd_queue/util/texter.py @@ -22,28 +22,32 @@ def lazy_import( ) -> Callable[[str], Any]: import importlib import os + name_to_submod = { - func: mod for mod, funcs in submod_attrs.items() - for func in funcs + func: mod for mod, funcs in submod_attrs.items() for func in funcs } def __getattr__(name: str) -> Any: if name in submodules: attr = importlib.import_module( '{module_name}.{name}'.format( - module_name=module_name, name=name) + module_name=module_name, name=name + ) ) elif name in name_to_submod: submodname = name_to_submod[name] module = importlib.import_module( '{module_name}.{submodname}'.format( - module_name=module_name, submodname=submodname) + module_name=module_name, submodname=submodname + ) ) attr = getattr(module, name) else: raise AttributeError( 'No {module_name} attribute {name}'.format( - module_name=module_name, name=name)) + module_name=module_name, name=name + ) + ) globals()[name] = attr return attr @@ -92,9 +96,30 @@ def __getattr__(name: str) -> Any: def __dir__() -> list[str]: return __all__ + __all__: list[str] = [ - 'actions', 'app', 'background', 'binding', 'case', 'driver', - 'drivers', 'events', 'geometry', 'keys', 'layout', 'layout_map', - 'layouts', 'message', 'message_pump', 'messages', 'page', - 'reactive', 'screen_update', 'scrollbar', 'view', 'views', 'widget', - 'widgets'] + 'actions', + 'app', + 'background', + 'binding', + 'case', + 'driver', + 'drivers', + 'events', + 'geometry', + 'keys', + 'layout', + 'layout_map', + 'layouts', + 'message', + 'message_pump', + 'messages', + 'page', + 'reactive', + 'screen_update', + 'scrollbar', + 'view', + 'views', + 'widget', + 'widgets', +] diff --git a/cmd_queue/util/textual_extensions.py b/cmd_queue/util/textual_extensions.py index 59a245c..2ec20b3 100644 --- a/cmd_queue/util/textual_extensions.py +++ b/cmd_queue/util/textual_extensions.py @@ -8,6 +8,7 @@ try: from textual.app import App + # from textual.driver import Driver # from typing import Type # from rich.console import Console @@ -47,8 +48,11 @@ class class_or_instancemethod(classmethod): >>> print(X().foo()) bound to the instance """ + def __get__(self, instance: Any, type_: Any) -> Any: # type: ignore - descr_get = super().__get__ if instance is None else self.__func__.__get__ # type: ignore + descr_get = ( + super().__get__ if instance is None else self.__func__.__get__ # type: ignore + ) return descr_get(instance, type_) @@ -95,6 +99,7 @@ def _run_as_cls( """ Original classmethod logic """ + async def run_app() -> None: app = cls(screen=screen, driver_class=driver, **kwargs) await app.process_messages() @@ -117,16 +122,19 @@ def _run_as_instance( if kwargs.get('title', None) is not None: self._title = kwargs.pop('title') if kwargs.get('log', None) is not None: - self.log_file = open(kwargs.pop('log'), "wt") + self.log_file = open(kwargs.pop('log'), 'wt') if kwargs.get('log_verbosity', None) is not None: self.log_verbosity = kwargs.pop('log_verbosity') if len(kwargs): raise ValueError( 'Cannot pass unhandled kwargs when running as an ' 'instance method. Assuming that instance variables ' - 'are already setup.') + 'are already setup.' + ) + async def run_app() -> None: await self.process_messages() + asyncio.run(run_app()) # Allow for use of run as a instance or classmethod @@ -146,23 +154,22 @@ def run( """ if isinstance(cls_or_self, type): # Running as a class method - cls_or_self._run_as_cls( - screen=screen, driver=driver, **kwargs) + cls_or_self._run_as_cls(screen=screen, driver=driver, **kwargs) else: # Running as an instance method - cls_or_self._run_as_instance( - screen=screen, driver=driver, **kwargs) + cls_or_self._run_as_instance(screen=screen, driver=driver, **kwargs) try: + class ExtHeader(Widget): # type: ignore - """ - """ + """ """ + def __init__( self, *, tall: bool = True, - style: str = "white on dark_green", + style: str = 'white on dark_green', clock: bool = True, ) -> None: """ @@ -175,14 +182,18 @@ def __init__( self.clock = clock tall: Reactive[bool] = Reactive(True, layout=True) - style: Reactive[StyleType] = Reactive("white on blue") + style: Reactive[StyleType] = Reactive('white on blue') clock: Reactive[bool] = Reactive(True) - title: Reactive[str] = Reactive("") - sub_title: Reactive[str] = Reactive("") + title: Reactive[str] = Reactive('') + sub_title: Reactive[str] = Reactive('') @property def full_title(self) -> str: - return f"{self.title} - {self.sub_title}" if self.sub_title else self.title + return ( + f'{self.title} - {self.sub_title}' + if self.sub_title + else self.title + ) def __rich_repr__(self) -> Result: yield self.title @@ -191,19 +202,23 @@ async def watch_tall(self, tall: bool) -> None: self.layout_size = 3 if tall else 1 def get_clock(self) -> str: - return datetime.now().time().strftime("%X") + return datetime.now().time().strftime('%X') def render(self) -> RenderableType: header_table = Table.grid(padding=(0, 1), expand=True) header_table.style = self.style - header_table.add_column(justify="left", ratio=0, width=8) - header_table.add_column("title", justify="center", ratio=1) - header_table.add_column("clock", justify="right", width=8) + header_table.add_column(justify='left', ratio=0, width=8) + header_table.add_column('title', justify='center', ratio=1) + header_table.add_column('clock', justify='right', width=8) header_table.add_row( - "⚡", self.full_title, self.get_clock() if self.clock else "" + '⚡', self.full_title, self.get_clock() if self.clock else '' ) header: RenderableType - header = Panel(header_table, style=self.style) if self.tall else header_table + header = ( + Panel(header_table, style=self.style) + if self.tall + else header_table + ) return header async def on_mount(self, event: Any) -> None: @@ -219,8 +234,8 @@ async def set_title(title: str) -> None: async def set_sub_title(sub_title: str) -> None: self.sub_title = sub_title - watch(self.app, "title", set_title) - watch(self.app, "sub_title", set_sub_title) + watch(self.app, 'title', set_title) + watch(self.app, 'sub_title', set_sub_title) async def on_click(self, event: Any) -> None: """ diff --git a/cmd_queue/util/util_algo.py b/cmd_queue/util/util_algo.py index 4637fd7..d5bdf6b 100644 --- a/cmd_queue/util/util_algo.py +++ b/cmd_queue/util/util_algo.py @@ -6,7 +6,9 @@ import numpy as np -def balanced_number_partitioning(items: np.ndarray | Sequence, num_parts: int) -> List[np.ndarray]: +def balanced_number_partitioning( + items: np.ndarray | Sequence, num_parts: int +) -> List[np.ndarray]: """ Greedy approximation to multiway number partitioning diff --git a/cmd_queue/util/util_bash.py b/cmd_queue/util/util_bash.py index 0757bf7..3f0e497 100644 --- a/cmd_queue/util/util_bash.py +++ b/cmd_queue/util/util_bash.py @@ -38,15 +38,11 @@ def bash_json_dump(json_fmt_parts, fpath): \ > out.json """ - printf_body_parts = [ - '"{}": {}'.format(k, f) for k, f, v in json_fmt_parts - ] - printf_arg_parts = [ - '"{}"'.format(v) for k, f, v in json_fmt_parts - ] - printf_body = r"'{" + ", ".join(printf_body_parts) + r"}\n'" + printf_body_parts = ['"{}": {}'.format(k, f) for k, f, v in json_fmt_parts] + printf_arg_parts = ['"{}"'.format(v) for k, f, v in json_fmt_parts] + printf_body = r"'{" + ', '.join(printf_body_parts) + r"}\n'" printf_args = ' '.join(printf_arg_parts) redirect_part = '> ' + str(fpath) - printf_part = 'printf ' + printf_body + ' \\\n ' + printf_args + printf_part = 'printf ' + printf_body + ' \\\n ' + printf_args dump_code = printf_part + ' \\\n ' + redirect_part return dump_code diff --git a/cmd_queue/util/util_tags.py b/cmd_queue/util/util_tags.py index baf58d0..8a0f459 100644 --- a/cmd_queue/util/util_tags.py +++ b/cmd_queue/util/util_tags.py @@ -30,6 +30,7 @@ def coerce( def intersection(self, other: Optional[Iterable[str]]) -> Optional[Tags]: import ubelt as ub + if other is None: return None isect = self.__class__(ub.oset(self) & set(other)) diff --git a/cmd_queue/util/util_tmux.py b/cmd_queue/util/util_tmux.py index 4ce765b..92c341c 100644 --- a/cmd_queue/util/util_tmux.py +++ b/cmd_queue/util/util_tmux.py @@ -28,10 +28,7 @@ def list_sessions() -> List[Dict[str, str]]: line = line.strip() if line: session_id, rest = line.split(':', 1) - sessions.append({ - 'id': session_id, - 'rest': rest - }) + sessions.append({'id': session_id, 'rest': rest}) return sessions @staticmethod @@ -45,11 +42,15 @@ def _capture_pane_command(target_session: str) -> str: @staticmethod def capture_pane(target_session: str, verbose: int = 3) -> Any: - return ub.cmd(tmux._capture_pane_command(target_session), verbose=verbose) + return ub.cmd( + tmux._capture_pane_command(target_session), verbose=verbose + ) @staticmethod def kill_session(target_session: str, verbose: int = 3) -> Any: - return ub.cmd(tmux._kill_session_command(target_session), verbose=verbose) + return ub.cmd( + tmux._kill_session_command(target_session), verbose=verbose + ) @staticmethod def kill_pane(pane_id: str, verbose: int = 3) -> Any: @@ -59,6 +60,7 @@ def kill_pane(pane_id: str, verbose: int = 3) -> Any: def is_inside() -> bool: """True if the current process is running inside a tmux session.""" import os + return bool(os.environ.get('TMUX')) @staticmethod @@ -83,6 +85,7 @@ def spawn_monitor_session( import os import shlex import sys + if not ub.find_exe('tmux'): raise RuntimeError('tmux is not available') @@ -90,7 +93,10 @@ def spawn_monitor_session( # globally-installed older ``cmd_queue`` binary on PATH would not # know about the monitor subcommand. cmd_parts = [ - sys.executable, '-m', 'cmd_queue', 'monitor', + sys.executable, + '-m', + 'cmd_queue', + 'monitor', '--manifest=' + str(manifest_path), ] if extra_args: @@ -101,8 +107,14 @@ def spawn_monitor_session( inner = ' '.join(shlex.quote(p) for p in cmd_parts) bash_payload = f'{inner}; exec bash' new_session_cmd = [ - 'tmux', 'new-session', '-d', '-s', session_name, - 'bash', '-lc', bash_payload, + 'tmux', + 'new-session', + '-d', + '-s', + session_name, + 'bash', + '-lc', + bash_payload, ] ub.cmd(new_session_cmd, verbose=verbose, check=True) @@ -115,14 +127,20 @@ def spawn_monitor_session( if inside: # Switching the current client is the in-tmux equivalent of # attach; spawning a nested attach is rejected by tmux. - ub.cmd(['tmux', 'switch-client', '-t', session_name], - verbose=verbose, check=True) + ub.cmd( + ['tmux', 'switch-client', '-t', session_name], + verbose=verbose, + check=True, + ) info['attached_via'] = 'switch-client' else: # ``attach-session`` is interactive, so let the foreground # process inherit the tty. - ub.cmd(['tmux', 'attach-session', '-t', session_name], - verbose=verbose, check=False) + ub.cmd( + ['tmux', 'attach-session', '-t', session_name], + verbose=verbose, + check=False, + ) info['attached_via'] = 'attach-session' return info @@ -164,13 +182,19 @@ def block_with_attach_prompt( inside_tmux = bool(os.environ.get('TMUX')) attach_cmd = ( - f'tmux switch-client -t {session_name}' if inside_tmux + f'tmux switch-client -t {session_name}' + if inside_tmux else f'tmux attach -t {session_name}' ) print(f'Watching {label}.') import rich - rich.print(rf'[bold]Press \[a][/bold] to attach to monitor session ({session_name})') - rich.print(r'[bold]Press \[q][/bold] to stop watching (queue keeps running).') + + rich.print( + rf'[bold]Press \[a][/bold] to attach to monitor session ({session_name})' + ) + rich.print( + r'[bold]Press \[q][/bold] to stop watching (queue keeps running).' + ) print(f'Manual reattach anytime from another shell:\n{attach_cmd}') fd = sys.stdin.fileno() @@ -189,11 +213,15 @@ def block_with_attach_prompt( termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) try: if inside_tmux: - ub.cmd(['tmux', 'switch-client', '-t', - session_name], check=False) + ub.cmd( + ['tmux', 'switch-client', '-t', session_name], + check=False, + ) else: - ub.cmd(['tmux', 'attach-session', '-t', - session_name], check=False) + ub.cmd( + ['tmux', 'attach-session', '-t', session_name], + check=False, + ) finally: # Re-enter cbreak when the user detaches back. tty.setcbreak(fd) @@ -219,50 +247,60 @@ def list_panes(target_session: str) -> List[Dict[str, str]]: print(f'rows = {ub.urepr(rows, nl=1)}') """ import json + # References: # https://github.com/tmux-python/libtmux/blob/f705713c7aff1b14e8f8f3ca53d1b0b6ba6e98d0/src/libtmux/formats.py#L80 PANE_FORMATS = [ - "pane_id", - "pane_index", - "pane_pid", - - "pane_active", - "pane_dead", - "pane_in_mode", - "pane_synchronized", - "pane_tty", - "pane_start_command", - "pane_start_path", - "pane_current_path", - "pane_current_command", - "cursor_x", - "cursor_y", - "scroll_region_upper", - "scroll_region_lower", - "saved_cursor_x", - "saved_cursor_y", - "alternate_on", - "alternate_saved_x", - "alternate_saved_y", - "cursor_flag", - "insert_flag", - "keypad_cursor_flag", - "keypad_flag", - "wrap_flag", - "mouse_standard_flag", - "mouse_button_flag", - "mouse_any_flag", - "mouse_utf8_flag", - "history_size", - "history_limit", - "history_bytes", - "pane_width", - "pane_height", + 'pane_id', + 'pane_index', + 'pane_pid', + 'pane_active', + 'pane_dead', + 'pane_in_mode', + 'pane_synchronized', + 'pane_tty', + 'pane_start_command', + 'pane_start_path', + 'pane_current_path', + 'pane_current_command', + 'cursor_x', + 'cursor_y', + 'scroll_region_upper', + 'scroll_region_lower', + 'saved_cursor_x', + 'saved_cursor_y', + 'alternate_on', + 'alternate_saved_x', + 'alternate_saved_y', + 'cursor_flag', + 'insert_flag', + 'keypad_cursor_flag', + 'keypad_flag', + 'wrap_flag', + 'mouse_standard_flag', + 'mouse_button_flag', + 'mouse_any_flag', + 'mouse_utf8_flag', + 'history_size', + 'history_limit', + 'history_bytes', + 'pane_width', + 'pane_height', # "pane_title", # removed in 3.1+ ] format_code = json.dumps({k: '#{' + k + '}' for k in PANE_FORMATS}) rows = [] - out: Any = ub.cmd(['tmux', 'list-panes', '-t', str(target_session), '-F', format_code], verbose=0) + out: Any = ub.cmd( + [ + 'tmux', + 'list-panes', + '-t', + str(target_session), + '-F', + format_code, + ], + verbose=0, + ) for line in out.stdout.strip().split('\n'): row = json.loads(line) rows.append(row) diff --git a/cmd_queue/util/util_yaml.py b/cmd_queue/util/util_yaml.py index e229a61..237151d 100644 --- a/cmd_queue/util/util_yaml.py +++ b/cmd_queue/util/util_yaml.py @@ -7,14 +7,15 @@ class _YamlRepresenter: - @staticmethod def str_presenter(dumper, data): # https://stackoverflow.com/questions/8640959/how-can-i-control-what-scalar-form-pyyaml-uses-for-my-data if len(data.splitlines()) > 1 or '\n' in data: text_list = [line.rstrip() for line in data.splitlines()] fixed_data = '\n'.join(text_list) - return dumper.represent_scalar('tag:yaml.org,2002:str', fixed_data, style='|') + return dumper.represent_scalar( + 'tag:yaml.org,2002:str', fixed_data, style='|' + ) return dumper.represent_scalar('tag:yaml.org,2002:str', data) @@ -29,6 +30,7 @@ def _custom_ruaml_loader(): https://stackoverflow.com/questions/76870413/using-a-custom-loader-with-ruamel-yaml-0-15-0 """ import ruamel.yaml + Loader = ruamel.yaml.RoundTripLoader def _construct_include_tag(self, node): @@ -38,10 +40,13 @@ def _construct_include_tag(self, node): else: external_fpath = ub.Path(node.value) if not external_fpath.exists(): - raise IOError(f'Included external yaml file {external_fpath} ' - 'does not exist') + raise IOError( + f'Included external yaml file {external_fpath} ' + 'does not exist' + ) return Yaml.load(node.value) - Loader.add_constructor("!include", _construct_include_tag) + + Loader.add_constructor('!include', _construct_include_tag) return Loader @@ -52,6 +57,7 @@ def _custom_ruaml_dumper(): https://stackoverflow.com/questions/59635900/ruamel-yaml-custom-commentedmapping-for-custom-tags """ import ruamel.yaml + Dumper = ruamel.yaml.RoundTripDumper Dumper.add_representer(str, _YamlRepresenter.str_presenter) Dumper.add_representer(ub.udict, Dumper.represent_dict) @@ -64,6 +70,7 @@ def _custom_pyaml_dumper(): class Dumper(yaml.Dumper): pass + # dumper = yaml.dumper.Dumper # dumper = yaml.SafeDumper(sort_keys=False) # yaml.dump(data, s, Dumper=yaml.SafeDumper, sort_keys=False, width=float("inf")) @@ -102,6 +109,7 @@ def _custom_new_ruaml_yaml_obj(): >>> print(file.getvalue()) """ import ruamel.yaml + # make a new instance, although you could get the YAML # instance from the constructor argument class CustomConstructor(ruamel.yaml.constructor.RoundTripConstructor): # type: ignore @@ -111,7 +119,9 @@ class CustomRepresenter(ruamel.yaml.representer.RoundTripRepresenter): # type: ... CustomRepresenter.add_representer(str, _YamlRepresenter.str_presenter) - CustomRepresenter.add_representer(ub.udict, CustomRepresenter.represent_dict) + CustomRepresenter.add_representer( + ub.udict, CustomRepresenter.represent_dict + ) def _construct_include_tag(self, node): print(f'node={node}') @@ -122,14 +132,17 @@ def _construct_include_tag(self, node): else: external_fpath = ub.Path(value) if not external_fpath.exists(): - raise IOError(f'Included external yaml file {external_fpath} ' - 'does not exist') + raise IOError( + f'Included external yaml file {external_fpath} ' + 'does not exist' + ) # Not sure why we can't recurse here... # yaml_obj # print(f'yaml_obj={yaml_obj}') # import xdev # xdev.embed() return Yaml.load(value) + # Loader = ruamel.yaml.RoundTripLoader # Loader.add_constructor("!include", _construct_include_tag) @@ -180,12 +193,18 @@ def dumps(data, backend='ruamel'): yaml_obj.dump(data, file) else: import ruamel.yaml + Dumper = _custom_ruaml_dumper() - ruamel.yaml.round_trip_dump(data, file, Dumper=Dumper, width=float("inf")) + ruamel.yaml.round_trip_dump( + data, file, Dumper=Dumper, width=float('inf') + ) elif backend == 'pyyaml': import yaml + Dumper = _custom_pyaml_dumper() - yaml.dump(data, file, Dumper=Dumper, sort_keys=False, width=float("inf")) + yaml.dump( + data, file, Dumper=Dumper, sort_keys=False, width=float('inf') + ) else: raise KeyError(backend) text = file.getvalue() @@ -227,6 +246,7 @@ def load(file, backend='ruamel'): else: if backend == 'ruamel': import ruamel.yaml # NOQA + # TODO: seems like there will be a deprecation # from ruamel.yaml import YAML if NEW_RUAMEL: @@ -236,10 +256,13 @@ def load(file, backend='ruamel'): # yaml = YAML(typ='unsafe', pure=True) # data = yaml.load(file, Loader=Loader, preserve_quotes=True) Loader = _custom_ruaml_loader() - data = ruamel.yaml.load(file, Loader=Loader, preserve_quotes=True) + data = ruamel.yaml.load( + file, Loader=Loader, preserve_quotes=True + ) # data = ruamel.yaml.load(file, Loader=ruamel.yaml.RoundTripLoader, preserve_quotes=True) elif backend == 'pyyaml': import yaml + # data = yaml.load(file, Loader=yaml.SafeLoader) data = yaml.load(file, Loader=yaml.Loader) else: @@ -389,6 +412,7 @@ def InlineList(items): .. [SO56937691] https://stackoverflow.com/questions/56937691/making-yaml-ruamel-yaml-always-dump-lists-inline """ import ruamel.yaml + ret = ruamel.yaml.comments.CommentedSeq(items) # type: ignore ret.fa.set_flow_style() return ret @@ -408,10 +432,12 @@ def Dict(data): >>> print(Yaml.dumps(data)) """ import ruamel.yaml + ret = ruamel.yaml.comments.CommentedMap(data) # type: ignore return ret @staticmethod def CodeBlock(text): import ruamel.yaml + return ruamel.yaml.scalarstring.LiteralScalarString(ub.codeblock(text)) # type: ignore diff --git a/dev/_devcheck_rich.py b/dev/_devcheck_rich.py index 8a5127f..9694c09 100644 --- a/dev/_devcheck_rich.py +++ b/dev/_devcheck_rich.py @@ -5,6 +5,7 @@ Cant do this with pure rich https://github.com/Textualize/rich/issues/2120 """ + from rich.table import Table from rich.live import Live import time @@ -12,6 +13,7 @@ def random_rich_table(): import random + r = random.random() columns = ['name', 'status', 'finished', 'errors', 'total'] table = Table() @@ -42,6 +44,7 @@ def simple_update_no_pager(): def simple_pager_no_update(): from rich.console import Console + console = Console() table = random_rich_table() with console.pager(): @@ -66,10 +69,9 @@ class MyApp(App): """An example of a very simple Textual App""" async def on_load(self, event: events.Load) -> None: - await self.bind("q", "quit", "Quit") + await self.bind('q', 'quit', 'Quit') async def on_mount(self, event: events.Mount) -> None: - self.body = body = ScrollView(auto_width=True) await self.view.dock(body) @@ -80,7 +82,7 @@ async def add_content(): await self.call_later(add_content) - MyApp.run(title="Simple App", log="textual.log") + MyApp.run(title='Simple App', log='textual.log') if __name__ == '__main__': diff --git a/examples/slurm_example.py b/examples/slurm_example.py index db5f9ef..1d0edf4 100644 --- a/examples/slurm_example.py +++ b/examples/slurm_example.py @@ -1,25 +1,35 @@ - - def main(): import cmd_queue import ubelt as ub - queue = cmd_queue.Queue.create(backend='slurm', partition='project123', - account='user123', ntasks=1) - job1 = queue.submit(ub.codeblock( - ''' + queue = cmd_queue.Queue.create( + backend='slurm', partition='project123', account='user123', ntasks=1 + ) + + job1 = queue.submit( + ub.codeblock( + """ command1 --input=foo.txt --output=bar.txt - ''')) + """ + ) + ) - job2 = queue.submit(ub.codeblock( - ''' + job2 = queue.submit( + ub.codeblock( + """ command2 --input=foo.txt --output=baz.txt - ''')) + """ + ) + ) - queue.submit(ub.codeblock( - ''' + queue.submit( + ub.codeblock( + """ command3 --input1=bar.txt --input2=baz.txt --output=buz.txt - '''), depends=[job2, job1]) + """ + ), + depends=[job2, job1], + ) queue.print_commands() diff --git a/examples/tmux_example.py b/examples/tmux_example.py index c5b5cb4..f87e6b4 100644 --- a/examples/tmux_example.py +++ b/examples/tmux_example.py @@ -43,6 +43,7 @@ # Force a clean run (no injected failures) python ~/code/cmd_queue/examples/tmux_example.py --failures=0 """ + import ubelt as ub import scriptconfig as scfg @@ -51,22 +52,41 @@ class TmuxExampleConfig(scfg.DataConfig): """ Automatically created module for IPython interactive environment """ - mode = scfg.Value('tmux', help='Where the monitor UI runs.', choices=['inline', 'tmux', 'none']) - name = scfg.Value('tmux-example', help=ub.paragraph( - ''' + + mode = scfg.Value( + 'tmux', + help='Where the monitor UI runs.', + choices=['inline', 'tmux', 'none'], + ) + name = scfg.Value( + 'tmux-example', + help=ub.paragraph( + """ Queue name; also doubles as the lookup key for `cmd_queue monitor `. - ''')) + """ + ), + ) workers = scfg.Value(4, type=int, help='Number of parallel tmux workers.') - failures = scfg.Value(6, type=int, help=ub.paragraph( - ''' + failures = scfg.Value( + 6, + type=int, + help=ub.paragraph( + """ Number of proc-* logical jobs to force into failure (0-4). The failures cascade: dependent merge/final jobs are skipped. - ''')) - logs = scfg.Value(True, isflag=True, help=ub.paragraph( - ''' + """ + ), + ) + logs = scfg.Value( + True, + isflag=True, + help=ub.paragraph( + """ Set to False to disable per-job log capture (default: enabled). - ''')) + """ + ), + ) def main(): @@ -81,7 +101,7 @@ def main(): ) proc_names = ['proc-A', 'proc-B', 'proc-C', 'proc-D'] - fail_set = set(proc_names[:max(0, min(args.failures, len(proc_names)))]) + fail_set = set(proc_names[: max(0, min(args.failures, len(proc_names)))]) submit_kw = {'log': args.logs} @@ -111,16 +131,10 @@ def submit_sleep_chain(base_name, total_sleep, depends=None, fail=False): name = f'{base_name}-{part:02d}' is_final_part = part == total_sleep - cmd = ( - f'echo "[{name}] start"; ' - f'sleep 1; ' - ) + cmd = f'echo "[{name}] start"; sleep 1; ' if is_final_part and fail: - cmd += ( - f'echo "[{base_name}] FORCED FAILURE" >&2; ' - f'exit 1' - ) + cmd += f'echo "[{base_name}] FORCED FAILURE" >&2; exit 1' elif is_final_part: cmd += f'echo "[{base_name}] done"' else: @@ -146,23 +160,24 @@ def submit_sleep_chain(base_name, total_sleep, depends=None, fail=False): # Level 2: each process job depends on exactly one prep job; some # may be forced to fail by --failures. proc_a = submit_sleep_chain( - 'proc-A', 3, depends=[prep_a], fail='proc-A' in fail_set) + 'proc-A', 3, depends=[prep_a], fail='proc-A' in fail_set + ) proc_b = submit_sleep_chain( - 'proc-B', 4, depends=[prep_b], fail='proc-B' in fail_set) + 'proc-B', 4, depends=[prep_b], fail='proc-B' in fail_set + ) proc_c = submit_sleep_chain( - 'proc-C', 5, depends=[prep_c], fail='proc-C' in fail_set) + 'proc-C', 5, depends=[prep_c], fail='proc-C' in fail_set + ) proc_d = submit_sleep_chain( - 'proc-D', 3, depends=[prep_d], fail='proc-D' in fail_set) + 'proc-D', 3, depends=[prep_d], fail='proc-D' in fail_set + ) # Level 3: two merge jobs, each waiting on a pair of proc jobs. - merge_x = submit_sleep_chain( - 'merge-X', 4, depends=[proc_a, proc_b]) - merge_y = submit_sleep_chain( - 'merge-Y', 3, depends=[proc_c, proc_d]) + merge_x = submit_sleep_chain('merge-X', 4, depends=[proc_a, proc_b]) + merge_y = submit_sleep_chain('merge-Y', 3, depends=[proc_c, proc_d]) # Level 4: single finalize job — the whole pipeline converges here. - submit_sleep_chain( - 'final', 2, depends=[merge_x, merge_y]) + submit_sleep_chain('final', 2, depends=[merge_x, merge_y]) queue.print_graph() diff --git a/run_tests.py b/run_tests.py index d55ab03..6f6fa46 100755 --- a/run_tests.py +++ b/run_tests.py @@ -2,16 +2,21 @@ if __name__ == '__main__': import pytest import sys + package_name = 'cmd_queue' mod_dpath = 'cmd_queue' test_dpath = 'tests' pytest_args = [ - '--cov-config', 'pyproject.toml', - '--cov-report', 'html', - '--cov-report', 'term', + '--cov-config', + 'pyproject.toml', + '--cov-report', + 'html', + '--cov-report', + 'term', '--xdoctest', '--cov=' + package_name, - mod_dpath, test_dpath + mod_dpath, + test_dpath, ] pytest_args = pytest_args + sys.argv[1:] sys.exit(pytest.main(pytest_args)) diff --git a/tests/test_airflow_queue.py b/tests/test_airflow_queue.py index 33ec628..cd31160 100644 --- a/tests/test_airflow_queue.py +++ b/tests/test_airflow_queue.py @@ -18,7 +18,9 @@ def _test_dpath(name: str) -> ub.Path: def _make_queue(name='cmdq_airflow_demo'): dpath = _test_dpath(name) airflow_home = dpath / 'airflow_home' - return AirflowQueue(name=name, dpath=dpath / 'queue_root', airflow_home=airflow_home) + return AirflowQueue( + name=name, dpath=dpath / 'queue_root', airflow_home=airflow_home + ) def test_finalize_text_contains_dependencies(): @@ -27,7 +29,7 @@ def test_finalize_text_contains_dependencies(): queue.submit('echo second', name='second_task', depends=first) text = queue.finalize_text() - assert "dag = DAG(" in text + assert 'dag = DAG(' in text assert "'finalize_demo'" in text assert "jobs['first_task']" in text assert "jobs['second_task']" in text @@ -37,8 +39,8 @@ def test_finalize_text_contains_dependencies(): def test_airflow_queue_run_executes_in_order(): queue = _make_queue(name='run_demo') outfile = queue.dpath / 'output.txt' - queue.submit(f"echo first >> {outfile}", name='first') - queue.submit(f"echo second >> {outfile}", name='second', depends='first') + queue.submit(f'echo first >> {outfile}', name='first') + queue.submit(f'echo second >> {outfile}', name='second', depends='first') queue.run() diff --git a/tests/test_bash_job_errors.py b/tests/test_bash_job_errors.py index 14ce711..ab3d747 100644 --- a/tests/test_bash_job_errors.py +++ b/tests/test_bash_job_errors.py @@ -2,9 +2,10 @@ def demo_script(dpath): - script_fpath = (dpath / 'myprog.py') - script_fpath.write_text(ub.codeblock( - ''' + script_fpath = dpath / 'myprog.py' + script_fpath.write_text( + ub.codeblock( + """ #!/usr/env/python def main(): @@ -30,27 +31,36 @@ def main(): if __name__ == '__main__': main() - ''')) + """ + ) + ) return script_fpath def test_bash_job_errors(): import ubelt as ub + dpath = ub.Path.appdir('cmd_queue', 'tests', 'test_bash_job_errors') dpath.delete().ensuredir() from cmd_queue.serial_queue import BashJob + # Demo full boilerplate for a job with no dependencies import sys + sys.executable script_fpath = demo_script(dpath) pyexe = sys.executable - self = BashJob(f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=True) + self = BashJob( + f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=True + ) self.print_commands(True, True) - self = BashJob(f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=False) + self = BashJob( + f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=False + ) self.print_commands(True, True) @@ -58,6 +68,7 @@ def test_tmux_queue_errors(): import ubelt as ub import sys import cmd_queue + dpath = ub.Path.appdir('cmd_queue', 'tests', 'test_tmux_queue_errors') dpath.delete().ensuredir() script_fpath = demo_script(dpath) @@ -66,16 +77,29 @@ def test_tmux_queue_errors(): log = True queue = cmd_queue.Queue.create(backend='tmux') - job1 = queue.submit(f'{pyexe} {script_fpath} --steps=3 --steptime=0.5', log=log) - job2 = queue.submit(f'{pyexe} {script_fpath} --steps=2 --steptime=0.5 --failflag', log=log, depends=job1) - job3 = queue.submit(f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', log=log, depends=job2) - job4 = queue.submit(f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', log=log) + job1 = queue.submit( + f'{pyexe} {script_fpath} --steps=3 --steptime=0.5', log=log + ) + job2 = queue.submit( + f'{pyexe} {script_fpath} --steps=2 --steptime=0.5 --failflag', + log=log, + depends=job1, + ) + job3 = queue.submit( + f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', + log=log, + depends=job2, + ) + job4 = queue.submit( + f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', log=log + ) # queue.submit(f'{pyexe} {script_fpath} --steps=2', log=log) queue.print_commands(True, True) queue.write() if not queue.is_available(): import pytest + pytest.skip('Skip tmux test. Tmux is not available') queue.run(block=0) diff --git a/tests/test_bash_variants.py b/tests/test_bash_variants.py index cc26956..8eabc3a 100644 --- a/tests/test_bash_variants.py +++ b/tests/test_bash_variants.py @@ -1,6 +1,7 @@ """ Tests for multiple variants of bash job text construction. """ + from cmd_queue.serial_queue import BashJob import subprocess import tempfile @@ -14,7 +15,7 @@ def test_primary_bash_job_text_variants(): that makes it easier to manually test common cases. """ main_variants = kwutil.Yaml.coerce( - ''' + """ - __testname__: plain jane cwd: False depends: False @@ -30,19 +31,22 @@ def test_primary_bash_job_text_variants(): log: True with_status: True with_gaurds: True - ''') + """ + ) dep = BashJob('echo hi', name='job1') for variant in main_variants: - job_kwargs = {} if variant['depends']: job_kwargs['depends'] = [dep] if variant['cwd']: job_kwargs['cwd'] = '/foo/bar' if variant['preamble']: - job_kwargs['preamble'] = ['export SETUP_LINE1=1', 'export SETUP_LINE2=2'] + job_kwargs['preamble'] = [ + 'export SETUP_LINE1=1', + 'export SETUP_LINE2=2', + ] finalize_kwargs = ub.udict(variant) & {'with_status', 'with_gaurds'} @@ -59,11 +63,14 @@ def test_primary_bash_job_text_variants(): if variant['__testname__'] == 'plain jane': assert text.strip() == command, ( - 'When there is nothing special, we just return the command as given') + 'When there is nothing special, we just return the command as given' + ) if variant['__testname__'] == 'the works': assert 'pushd "/foo/bar"' in text assert 'popd' in text - assert 'CHDIR_OK' in text, "cwd=True should define CHDIR_OK and guard popd" + assert 'CHDIR_OK' in text, ( + 'cwd=True should define CHDIR_OK and guard popd' + ) assert 'if [[ "$CHDIR_OK" == "1" ]]' in text or 'CHDIR_OK' in text assert 'export SETUP_LINE1=1' in text assert 'export SETUP_LINE2=2' in text @@ -72,27 +79,30 @@ def test_primary_bash_job_text_variants(): def test_bash_job_variants_syntax_grided(): basis = kwutil.Yaml.coerce( - ''' + """ cwd: [True, False] depends: [True, False] preamble: [True, False] log: [True, False] with_status: [True, False] with_gaurds: [True, False] - ''') + """ + ) grid_variants = list(ub.named_product(**basis)) dep = BashJob('echo hi', name='job1') for variant in grid_variants: - job_kwargs = {} if variant['depends']: job_kwargs['depends'] = [dep] if variant['cwd']: job_kwargs['cwd'] = '/foo/bar' if variant['preamble']: - job_kwargs['preamble'] = ['export SETUP_LINE1=1', 'export SETUP_LINE2=2'] + job_kwargs['preamble'] = [ + 'export SETUP_LINE1=1', + 'export SETUP_LINE2=2', + ] finalize_kwargs = ub.udict(variant) & {'with_status', 'with_gaurds'} @@ -116,12 +126,15 @@ def test_bash_job_variants_syntax_grided(): if proc.returncode == 0: print('Parse check is ok') else: - raise AssertionError(f"bash syntax error: \nSTDERR:\n{proc.stderr}\nSCRIPT:\n{text}") + raise AssertionError( + f'bash syntax error: \nSTDERR:\n{proc.stderr}\nSCRIPT:\n{text}' + ) # --- Plain-jane invariant: if nothing special, should equal command if not any(variant.values()): assert text.strip() == 'echo hi', ( - 'When there is nothing special, we just return the command as given') + 'When there is nothing special, we just return the command as given' + ) # --- Preamble should not be echoed if guards are on (i.e. set -x happens after preamble) if variant['preamble']: @@ -132,88 +145,141 @@ def test_bash_job_variants_syntax_grided(): pre_idx = text.find('export SETUP_LINE1=1') x_idx = text.find('set -x') assert pre_idx != -1 and x_idx != -1 and pre_idx < x_idx, ( - 'dont enable echo before preamble') + 'dont enable echo before preamble' + ) # --- Logging behavior if variant['log']: # When log is enabled, we expect tee + pipefail boilerplate - assert 'tee' in text, "log=True should use tee" + assert 'tee' in text, 'log=True should use tee' # Be strict if log_fpath is available on self; otherwise fall back to generic checks if hasattr(self, 'log_fpath'): - assert str(self.log_fpath) in text, "log=True should reference log_fpath" + assert str(self.log_fpath) in text, ( + 'log=True should reference log_fpath' + ) if variant['with_gaurds']: - assert 'set -o pipefail' in text, "log=True should enable pipefail" - assert 'set +o pipefail' in text, "log=True should restore pipefail" + assert 'set -o pipefail' in text, ( + 'log=True should enable pipefail' + ) + assert 'set +o pipefail' in text, ( + 'log=True should restore pipefail' + ) else: # When log is disabled, we should not see pipefail boilerplate - assert 'set -o pipefail' not in text, "log=False should not enable pipefail" - assert 'set +o pipefail' not in text, "log=False should not restore pipefail" + assert 'set -o pipefail' not in text, ( + 'log=False should not enable pipefail' + ) + assert 'set +o pipefail' not in text, ( + 'log=False should not restore pipefail' + ) # tee should not appear unless user command includes it (unlikely in these tests) # If you want to be strict: - assert 'tee ' not in text, "log=False should not insert tee" + assert 'tee ' not in text, 'log=False should not insert tee' # --- Guard behavior: when with_gaurds is enabled, we expect set +e and the brace return-code capture if variant['with_gaurds']: - assert 'set +e' in text, "with_gaurds=True should disable exit-on-error" + assert 'set +e' in text, ( + 'with_gaurds=True should disable exit-on-error' + ) # We should enable xtrace somewhere (unless bookkeeper disables it; in your tests it should not) - assert 'set -x' in text, "with_gaurds=True should enable command echo" + assert 'set -x' in text, ( + 'with_gaurds=True should enable command echo' + ) # Return code capture should be hidden inside brace trick - assert '{ RETURN_CODE=$?' in text, "with_gaurds=True should capture RETURN_CODE in brace trick" - assert 'set +x -e' in text, "with_gaurds=True should disable echo and re-enable -e" + assert '{ RETURN_CODE=$?' in text, ( + 'with_gaurds=True should capture RETURN_CODE in brace trick' + ) + assert 'set +x -e' in text, ( + 'with_gaurds=True should disable echo and re-enable -e' + ) # Ensure we don't have a noisy RETURN_CODE=$? line outside the brace trick - bad_lines = [ln for ln in text.splitlines() if ln.strip().startswith('RETURN_CODE=$?')] + bad_lines = [ + ln + for ln in text.splitlines() + if ln.strip().startswith('RETURN_CODE=$?') + ] assert not bad_lines, ( - f"RETURN_CODE capture should be in brace trick, found: {bad_lines}") + f'RETURN_CODE capture should be in brace trick, found: {bad_lines}' + ) else: # If guards are off, we should not see xtrace toggles or the brace trick capture - assert 'set -x' not in text, "with_gaurds=False should not enable xtrace" - assert 'set +x -e' not in text, "with_gaurds=False should not include brace trick toggles" - assert '{ RETURN_CODE=$?' not in text, "with_gaurds=False should not include brace trick capture" + assert 'set -x' not in text, ( + 'with_gaurds=False should not enable xtrace' + ) + assert 'set +x -e' not in text, ( + 'with_gaurds=False should not include brace trick toggles' + ) + assert '{ RETURN_CODE=$?' not in text, ( + 'with_gaurds=False should not include brace trick capture' + ) # --- Status behavior if variant['with_status']: - assert 'Mark job as running' in text, "with_status=True should mark job as running" - assert 'Mark job as stopped' in text, "with_status=True should mark job as stopped" - assert 'printf "pass" >' in text, "with_status=True should write pass marker" - assert 'printf "fail" >' in text, "with_status=True should write fail marker" - assert 'stat' in text or 'status' in text, "with_status=True should dump status JSON" + assert 'Mark job as running' in text, ( + 'with_status=True should mark job as running' + ) + assert 'Mark job as stopped' in text, ( + 'with_status=True should mark job as stopped' + ) + assert 'printf "pass" >' in text, ( + 'with_status=True should write pass marker' + ) + assert 'printf "fail" >' in text, ( + 'with_status=True should write fail marker' + ) + assert 'stat' in text or 'status' in text, ( + 'with_status=True should dump status JSON' + ) # Make sure RETURN_CODE is referenced in final status conditional assert '"$RETURN_CODE"' in text or 'RETURN_CODE' in text, ( - "with_status=True should reference RETURN_CODE") + 'with_status=True should reference RETURN_CODE' + ) else: # When status is off, we should not emit pass/fail markers - assert 'printf "pass" >' not in text, "with_status=False should not write pass marker" - assert 'printf "fail" >' not in text, "with_status=False should not write fail marker" + assert 'printf "pass" >' not in text, ( + 'with_status=False should not write pass marker' + ) + assert 'printf "fail" >' not in text, ( + 'with_status=False should not write fail marker' + ) assert 'Mark job as running' not in text assert 'Mark job as stopped' not in text # --- Dependency guard behavior is only emitted when status is on and depends exist if variant['depends'] and variant['with_status']: - assert 'if [ -f' in text, "depends+with_status should emit dependency condition" - assert 'RETURN_CODE=126' in text, "depends+with_status should set skip RETURN_CODE=126" + assert 'if [ -f' in text, ( + 'depends+with_status should emit dependency condition' + ) + assert 'RETURN_CODE=126' in text, ( + 'depends+with_status should set skip RETURN_CODE=126' + ) else: # Be careful: user command might contain this string, but in these tests it won't. assert 'RETURN_CODE=126' not in text, ( - "no depends or no status: should not insert skip RETURN_CODE") + 'no depends or no status: should not insert skip RETURN_CODE' + ) # --- CWD behavior if variant['cwd']: - assert 'pushd' in text, "cwd=True should use pushd" - assert 'popd' in text, "cwd=True should include popd" + assert 'pushd' in text, 'cwd=True should use pushd' + assert 'popd' in text, 'cwd=True should include popd' else: - assert 'pushd' not in text, "cwd=False should not include pushd" - assert 'popd' not in text, "cwd=False should not include popd" + assert 'pushd' not in text, 'cwd=False should not include pushd' + assert 'popd' not in text, 'cwd=False should not include popd' # --- If we emit internal conditional checks (preamble/cwd), they must be closed properly if variant['cwd'] or variant['preamble']: # If you use a recognizable comment/tag, assert it exists if 'internal condition check' in text: - assert 'fi # internal condition check' in text, "internal if must be closed" + assert 'fi # internal condition check' in text, ( + 'internal if must be closed' + ) else: # Generic safety: at least ensure the count of 'if [[ ' and 'fi' isn't wildly off # (This is loose on purpose to avoid false positives with outer dependency if.) assert text.count('if [[ ') <= text.count('fi'), ( - "seems like an internal if may be missing a fi") + 'seems like an internal if may be missing a fi' + ) # --- Optional: ordering sanity when guards+status on # Ensure xtrace starts after "Mark job as running" and stops before "Mark job as stopped" @@ -223,28 +289,28 @@ def test_bash_job_variants_syntax_grided(): stopped_idx = text.find('Mark job as stopped') if running_idx != -1 and x_idx != -1 and stopped_idx != -1: assert running_idx < x_idx < stopped_idx, ( - "xtrace should not include boilerplate status dump; it should wrap the payload") + 'xtrace should not include boilerplate status dump; it should wrap the payload' + ) n_checks = len(grid_variants) print(f'Ran all n_checks={n_checks}') def test_bashjob_exec_preamble_fail(): - with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - workdir = tmp_path / "work" + workdir = tmp_path / 'work' workdir.mkdir() # Command would create a file if it ran — use that to detect it was skipped - outfile = tmp_path / "ran.txt" + outfile = tmp_path / 'ran.txt' job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(workdir)) job.preamble = ['false'] # fail-fast preamble job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -258,7 +324,7 @@ def test_bashjob_exec_preamble_fail(): ) assert job.fail_fpath.exists() - assert not outfile.exists(), "command should not run if preamble fails" + assert not outfile.exists(), 'command should not run if preamble fails' status = kwutil.Json.load(job.stat_fpath) assert status['ret'] != 0 @@ -267,26 +333,31 @@ def test_bashjob_exec_preamble_fail(): def test_bashjob_exec_depends_met_runs(): with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - workdir = tmp_path / "work" + workdir = tmp_path / 'work' workdir.mkdir() dep = BashJob('echo dep', name='dep_job') - dep.pass_fpath = tmp_path / "dep_job.pass" - dep.fail_fpath = tmp_path / "dep_job.fail" - dep.stat_fpath = tmp_path / "dep_job.status.json" + dep.pass_fpath = tmp_path / 'dep_job.pass' + dep.fail_fpath = tmp_path / 'dep_job.fail' + dep.stat_fpath = tmp_path / 'dep_job.status.json' # Create dependency pass marker dep.pass_fpath.parent.mkdir(parents=True, exist_ok=True) dep.pass_fpath.write_text('pass') - outfile = tmp_path / "ran.txt" - job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(workdir), depends=[dep]) + outfile = tmp_path / 'ran.txt' + job = BashJob( + f'echo ran > "{outfile}"', + name='job2', + cwd=str(workdir), + depends=[dep], + ) job.preamble = ['export SETUP_LINE1=1'] job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -300,8 +371,8 @@ def test_bashjob_exec_depends_met_runs(): check=False, ) - assert outfile.exists(), "command should run if dependency is met" - assert job.pass_fpath.exists(), "job should pass" + assert outfile.exists(), 'command should run if dependency is met' + assert job.pass_fpath.exists(), 'job should pass' assert not job.fail_fpath.exists() status = kwutil.Json.load(job.stat_fpath) @@ -311,25 +382,30 @@ def test_bashjob_exec_depends_met_runs(): def test_bashjob_exec_depends_unmet_skips(): with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - workdir = tmp_path / "work" + workdir = tmp_path / 'work' workdir.mkdir() dep = BashJob('echo dep', name='dep_job') - dep.pass_fpath = tmp_path / "dep_job.pass" - dep.fail_fpath = tmp_path / "dep_job.fail" - dep.stat_fpath = tmp_path / "dep_job.status.json" + dep.pass_fpath = tmp_path / 'dep_job.pass' + dep.fail_fpath = tmp_path / 'dep_job.fail' + dep.stat_fpath = tmp_path / 'dep_job.status.json' # Do NOT create dep.pass_fpath => dependency unmet - outfile = tmp_path / "ran.txt" - job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(workdir), depends=[dep]) + outfile = tmp_path / 'ran.txt' + job = BashJob( + f'echo ran > "{outfile}"', + name='job2', + cwd=str(workdir), + depends=[dep], + ) job.preamble = ['export SETUP_LINE1=1'] job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" - job.skip_fpath = tmp_path / "job2.skip" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' + job.skip_fpath = tmp_path / 'job2.skip' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -343,11 +419,15 @@ def test_bashjob_exec_depends_unmet_skips(): check=False, ) - assert not outfile.exists(), "command should not run if dependency is unmet" + assert not outfile.exists(), ( + 'command should not run if dependency is unmet' + ) # Skipped jobs (deps unmet, RC=126) write skip_fpath only — they # are NOT also marked as failed. - assert job.skip_fpath.exists(), "skipped job should be marked as skip" - assert not job.fail_fpath.exists(), "skipped job should not be marked as fail" + assert job.skip_fpath.exists(), 'skipped job should be marked as skip' + assert not job.fail_fpath.exists(), ( + 'skipped job should not be marked as fail' + ) assert not job.pass_fpath.exists() status = kwutil.Json.load(job.stat_fpath) @@ -358,17 +438,19 @@ def test_bashjob_exec_cwd_missing_skips_command(): with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - missing_dir = tmp_path / "does_not_exist" + missing_dir = tmp_path / 'does_not_exist' assert not missing_dir.exists() - outfile = tmp_path / "ran.txt" - job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(missing_dir)) + outfile = tmp_path / 'ran.txt' + job = BashJob( + f'echo ran > "{outfile}"', name='job2', cwd=str(missing_dir) + ) job.preamble = ['export SETUP_LINE1=1'] job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -382,8 +464,8 @@ def test_bashjob_exec_cwd_missing_skips_command(): check=False, ) - assert job.fail_fpath.exists(), "missing cwd should mark job as failed" - assert not outfile.exists(), "command should not run if cwd pushd fails" + assert job.fail_fpath.exists(), 'missing cwd should mark job as failed' + assert not outfile.exists(), 'command should not run if cwd pushd fails' assert not job.pass_fpath.exists() status = kwutil.Json.load(job.stat_fpath) @@ -393,17 +475,17 @@ def test_bashjob_exec_cwd_missing_skips_command(): def test_bashjob_exec_happy_path(): with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - workdir = tmp_path / "work" + workdir = tmp_path / 'work' workdir.mkdir() - outfile = tmp_path / "ran.txt" + outfile = tmp_path / 'ran.txt' job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(workdir)) job.preamble = ['export SETUP_LINE1=1', 'export SETUP_LINE2=2'] job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -417,9 +499,9 @@ def test_bashjob_exec_happy_path(): check=False, ) - assert outfile.exists(), "command should run on happy path" - assert job.pass_fpath.exists(), "pass marker should exist" - assert not job.fail_fpath.exists(), "fail marker should not exist" + assert outfile.exists(), 'command should run on happy path' + assert job.pass_fpath.exists(), 'pass marker should exist' + assert not job.fail_fpath.exists(), 'fail marker should not exist' status = kwutil.Json.load(job.stat_fpath) assert status['ret'] == 0 diff --git a/tests/test_cli.py b/tests/test_cli.py index e18d091..8c3389b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,14 +1,14 @@ - def test_cli(): """ Ensure the CLI works as expected """ import ubelt as ub + dpath = ub.Path.appdir('cmd_queue/tests/tests_cli').ensuredir() bash_text = ub.codeblock( - r''' + r""" cmd_queue new testqueue1 cmd_queue submit --jobname "job1" -- testqueue1 \ @@ -20,7 +20,8 @@ def test_cli(): cmd_queue show testqueue1 cmd_queue run testqueue1 --backend=serial - ''') + """ + ) fpath = dpath / 'test_script.sh' fpath.write_text(bash_text) @@ -35,12 +36,13 @@ def test_cli_single_executable(): """ import ubelt as ub + dpath = ub.Path.appdir('cmd_queue/tests/tests_cli').ensuredir() true_exe = ub.find_exe('true') bash_text = ub.codeblock( - fr''' + rf""" cmd_queue new testqueue2 cmd_queue submit --jobname "job1" -- testqueue2 \ @@ -52,7 +54,8 @@ def test_cli_single_executable(): cmd_queue show testqueue2 cmd_queue run testqueue2 --backend=serial - ''') + """ + ) fpath = dpath / 'test_script.sh' fpath.write_text(bash_text) diff --git a/tests/test_errors.py b/tests/test_errors.py index 4b9316f..cb2d1c3 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,7 +1,7 @@ - def test_failures_on_each_backend(): # Test case where a job fails import cmd_queue + backends = cmd_queue.Queue.available_backends() for backend in backends: self = cmd_queue.Queue.create(backend=backend) diff --git a/tests/test_import.py b/tests/test_import.py index 086338f..46d88b6 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -1,3 +1,4 @@ def test_import(): import cmd_queue + print(f'cmd_queue={cmd_queue}') diff --git a/tests/test_slurm_variants.py b/tests/test_slurm_variants.py index 85852ef..c8ec462 100644 --- a/tests/test_slurm_variants.py +++ b/tests/test_slurm_variants.py @@ -9,12 +9,14 @@ def _extract_wrap_payload(sbatch_args): extract the string passed to --wrap and unquote it. """ # sbatch_args contains an entry like: '--wrap \'\'' - wrap_items = [item for item in sbatch_args if item.startswith("--wrap ")] - assert len(wrap_items) == 1, f"Expected exactly one --wrap item, got: {wrap_items}" + wrap_items = [item for item in sbatch_args if item.startswith('--wrap ')] + assert len(wrap_items) == 1, ( + f'Expected exactly one --wrap item, got: {wrap_items}' + ) wrap_item = wrap_items[0] # split once: "--wrap " - _, quoted_payload = wrap_item.split(" ", 1) + _, quoted_payload = wrap_item.split(' ', 1) # The payload is shlex.quote(...)'d in the implementation payload = shlex.split(quoted_payload)[0] @@ -23,12 +25,12 @@ def _extract_wrap_payload(sbatch_args): def test_slurm_wrap_contains_global_then_job_preamble_then_command(): # Global preamble: specified on the queue - global_preamble = ["echo GLOBAL1", "echo GLOBAL2"] + global_preamble = ['echo GLOBAL1', 'echo GLOBAL2'] # Job preamble: specified per submit - job_preamble = "echo JOB1" + job_preamble = 'echo JOB1' - command = "echo RUN" + command = 'echo RUN' queue = SlurmQueue(preamble=global_preamble) @@ -42,31 +44,31 @@ def test_slurm_wrap_contains_global_then_job_preamble_then_command(): payload = _extract_wrap_payload(sbatch_args) # The payload should be a single shell line with && joining - expected = " && ".join(global_preamble + [job_preamble, command]) + expected = ' && '.join(global_preamble + [job_preamble, command]) # Exact match is reasonable here because payload construction is deterministic - assert payload == expected, f"\nExpected:\n{expected}\nGot:\n{payload}" + assert payload == expected, f'\nExpected:\n{expected}\nGot:\n{payload}' def test_slurm_wrap_omits_missing_preambles(): # No global preamble, no job preamble queue = SlurmQueue(preamble=None) - job = queue.submit("echo ONLYCMD", preamble=None) + job = queue.submit('echo ONLYCMD', preamble=None) sbatch_args = job._build_sbatch_args(global_preamble=queue.header_commands) payload = _extract_wrap_payload(sbatch_args) - assert payload == "echo ONLYCMD" + assert payload == 'echo ONLYCMD' # Global preamble only - queue = SlurmQueue(preamble=["echo GLOBAL"]) - job = queue.submit("echo CMD", preamble=None) + queue = SlurmQueue(preamble=['echo GLOBAL']) + job = queue.submit('echo CMD', preamble=None) sbatch_args = job._build_sbatch_args(global_preamble=queue.header_commands) payload = _extract_wrap_payload(sbatch_args) - assert payload == "echo GLOBAL && echo CMD" + assert payload == 'echo GLOBAL && echo CMD' # Job preamble only queue = SlurmQueue(preamble=None) - job = queue.submit("echo CMD", preamble="echo JOB") + job = queue.submit('echo CMD', preamble='echo JOB') sbatch_args = job._build_sbatch_args(global_preamble=queue.header_commands) payload = _extract_wrap_payload(sbatch_args) - assert payload == "echo JOB && echo CMD" + assert payload == 'echo JOB && echo CMD' diff --git a/tests/tests_mixed_hardware_tmux.py b/tests/tests_mixed_hardware_tmux.py index e942d50..6b92552 100644 --- a/tests/tests_mixed_hardware_tmux.py +++ b/tests/tests_mixed_hardware_tmux.py @@ -7,6 +7,7 @@ def test_mixed_hardware(): import cmd_queue import ubelt as ub + backend = 'tmux' gres = [0, 1] @@ -14,24 +15,61 @@ def test_mixed_hardware(): dpath = ub.Path.appdir('cmd_queue', 'tests', 'test_mixed_hardware') environ = {} - queue = cmd_queue.Queue.create(backend, name='test_mixed_hardware', - size=2, environ=environ, - dpath=dpath, gres=gres) + queue = cmd_queue.Queue.create( + backend, + name='test_mixed_hardware', + size=2, + environ=environ, + dpath=dpath, + gres=gres, + ) import itertools as it + counter = it.count(0) def submit_tree(queue, need_pred_pxl=True): index = next(counter) if need_pred_pxl: - pred_pxl_job = queue.submit('echo "pred_pxl: $CUDA_VISIBLE_DEVICES"', name=f'pred_pxl_{index}', depends=None, cpus=5, gpus=1) + pred_pxl_job = queue.submit( + 'echo "pred_pxl: $CUDA_VISIBLE_DEVICES"', + name=f'pred_pxl_{index}', + depends=None, + cpus=5, + gpus=1, + ) else: pred_pxl_job = None - queue.submit('echo "eval_pxl: $CUDA_VISIBLE_DEVICES"', name=f'eval_pxl_{index}', depends=pred_pxl_job, cpus=2) - queue.submit('echo "pred_trk: $CUDA_VISIBLE_DEVICES"', name=f'pred_trk_{index}', depends=pred_pxl_job, cpus=2) - queue.submit('echo "eval_trk: $CUDA_VISIBLE_DEVICES"', name=f'eval_trk_{index}', depends=f'pred_trk_{index}', cpus=2) - queue.submit('echo "pred_act: $CUDA_VISIBLE_DEVICES"', name=f'pred_act_{index}', depends=pred_pxl_job, cpus=2) - queue.submit('echo "eval_act: $CUDA_VISIBLE_DEVICES"', name=f'eval_act_{index}', depends=f'pred_act_{index}', cpus=2) + queue.submit( + 'echo "eval_pxl: $CUDA_VISIBLE_DEVICES"', + name=f'eval_pxl_{index}', + depends=pred_pxl_job, + cpus=2, + ) + queue.submit( + 'echo "pred_trk: $CUDA_VISIBLE_DEVICES"', + name=f'pred_trk_{index}', + depends=pred_pxl_job, + cpus=2, + ) + queue.submit( + 'echo "eval_trk: $CUDA_VISIBLE_DEVICES"', + name=f'eval_trk_{index}', + depends=f'pred_trk_{index}', + cpus=2, + ) + queue.submit( + 'echo "pred_act: $CUDA_VISIBLE_DEVICES"', + name=f'pred_act_{index}', + depends=pred_pxl_job, + cpus=2, + ) + queue.submit( + 'echo "eval_act: $CUDA_VISIBLE_DEVICES"', + name=f'eval_act_{index}', + depends=f'pred_act_{index}', + cpus=2, + ) submit_tree(queue) submit_tree(queue) From 5355ca06e58f28eb3431d73c5d08b21fb6c0b104 Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 16:58:00 -0400 Subject: [PATCH 24/27] Ruff check fix --- cmd_queue/__init__.py | 1 - cmd_queue/airflow_queue.py | 25 +++++++++++++++---------- cmd_queue/base_queue.py | 16 +++++++++------- cmd_queue/cli_boilerplate.py | 4 ++-- cmd_queue/main.py | 6 ++++-- cmd_queue/monitor_app.py | 8 ++++---- cmd_queue/monitor_manifest.py | 2 +- cmd_queue/serial_queue.py | 6 +++--- cmd_queue/slurm_queue.py | 9 ++++----- cmd_queue/tmux_queue.py | 11 ++++++----- cmd_queue/util/textual_extensions.py | 18 +++++++++--------- cmd_queue/util/util_algo.py | 2 +- cmd_queue/util/util_networkx.py | 2 +- cmd_queue/util/util_yaml.py | 2 +- dev/_devcheck_rich.py | 7 ++++--- docs/source/conf.py | 5 ++--- examples/slurm_example.py | 3 ++- examples/tmux_example.py | 2 +- run_tests.py | 3 ++- setup.py | 8 ++++---- tests/test_airflow_queue.py | 3 +-- tests/test_bash_job_errors.py | 8 +++++--- tests/test_bash_variants.py | 6 ++++-- tests/test_slurm_variants.py | 3 ++- tests/tests_mixed_hardware_tmux.py | 3 ++- 25 files changed, 89 insertions(+), 74 deletions(-) diff --git a/cmd_queue/__init__.py b/cmd_queue/__init__.py index 30267dd..5dc31d6 100644 --- a/cmd_queue/__init__.py +++ b/cmd_queue/__init__.py @@ -313,7 +313,6 @@ 'base_queue': ['Queue'], } from cmd_queue import base_queue - from cmd_queue.base_queue import ( Queue, ) diff --git a/cmd_queue/airflow_queue.py b/cmd_queue/airflow_queue.py index 7862d33..9150260 100644 --- a/cmd_queue/airflow_queue.py +++ b/cmd_queue/airflow_queue.py @@ -1,4 +1,5 @@ from __future__ import annotations + # mypy: ignore-errors r"""Airflow backend. @@ -228,14 +229,15 @@ def run(self, block: bool = True, system: bool = False) -> None: 'Non-blocking airflow runs are not implemented yet' ) with self._patched_env(env): - from airflow.utils import db + import contextlib + import sys + + from airflow.models.dag import DagModel from airflow.models.dagbag import DagBag - from airflow.models.serialized_dag import DagVersion from airflow.models.dagbundle import DagBundleModel - from airflow.models.dag import DagModel + from airflow.models.serialized_dag import DagVersion + from airflow.utils import db from airflow.utils.session import create_session - import sys - import contextlib if hasattr(db, 'resetdb'): db.resetdb() @@ -295,9 +297,9 @@ def read_state(self): """ env = self._airflow_env() with self._patched_env(env): - from airflow.utils.session import create_session from airflow.models.dagrun import DagRun from airflow.models.taskinstance import TaskInstance + from airflow.utils.session import create_session from sqlalchemy import select try: @@ -307,7 +309,9 @@ def read_state(self): failed_state = TaskInstanceState.FAILED skipped_state = TaskInstanceState.SKIPPED except Exception: # pragma: no cover - from airflow.utils.state import State as TaskInstanceState # type: ignore + from airflow.utils.state import ( + State as TaskInstanceState, # type: ignore + ) success_state = TaskInstanceState.SUCCESS failed_state = TaskInstanceState.FAILED @@ -464,9 +468,9 @@ def print_commands( code = self.finalize_text() if style == 'rich': + from rich.console import Console from rich.panel import Panel from rich.syntax import Syntax - from rich.console import Console console = Console() console.print(Panel(Syntax(code, 'python'), title=str(self.fpath))) @@ -494,9 +498,10 @@ def demo() -> None: from cmd_queue.airflow_queue import * # NOQA demo() """ - from airflow import DAG - from datetime import timezone from datetime import datetime as datetime_cls + from datetime import timezone + + from airflow import DAG from airflow.operators.bash import BashOperator now = datetime_cls.now(timezone.utc) diff --git a/cmd_queue/base_queue.py b/cmd_queue/base_queue.py index f67757e..f2bad14 100644 --- a/cmd_queue/base_queue.py +++ b/cmd_queue/base_queue.py @@ -1,6 +1,6 @@ from __future__ import annotations -# mypy: ignore-errors +# mypy: ignore-errors from typing import Any, Dict, Iterable, List, Optional, Union import ubelt as ub @@ -236,10 +236,12 @@ def submit(self, command: Union[str, Job], **kwargs: Any) -> Job: @classmethod def _backend_classes(cls): - from cmd_queue import tmux_queue - from cmd_queue import serial_queue - from cmd_queue import slurm_queue - from cmd_queue import airflow_queue + from cmd_queue import ( + airflow_queue, + serial_queue, + slurm_queue, + tmux_queue, + ) lut = { 'serial': serial_queue.SerialQueue, @@ -401,9 +403,9 @@ def print_commands( exclude_tags=exclude_tags, ) if style == 'rich': - from rich.syntax import Syntax - from rich.panel import Panel from rich.console import Console + from rich.panel import Panel + from rich.syntax import Syntax console = Console() console.print(Panel(Syntax(code, 'bash'), title=str(self.fpath))) diff --git a/cmd_queue/cli_boilerplate.py b/cmd_queue/cli_boilerplate.py index 7c27667..9215d54 100644 --- a/cmd_queue/cli_boilerplate.py +++ b/cmd_queue/cli_boilerplate.py @@ -1,4 +1,5 @@ from __future__ import annotations + # mypy: ignore-errors """ @@ -98,8 +99,8 @@ >>> print('----------------') >>> my_cli_main(cmdline=0, run=1, print_queue=0, print_commands=0) """ -from typing import Any, Dict, Optional import typing +from typing import Any, Dict, Optional import scriptconfig as scfg import ubelt as ub @@ -295,7 +296,6 @@ def run_queue( queue (cmd_queue.Queue): queue to run / report print_kwargs (None | Dict): """ - import cmd_queue queue: cmd_queue.Queue print_thresh = 30 diff --git a/cmd_queue/main.py b/cmd_queue/main.py index b4c2672..c2c6ece 100644 --- a/cmd_queue/main.py +++ b/cmd_queue/main.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # PYTHON_ARGCOMPLETE_OK from __future__ import annotations + # mypy: ignore-errors """ @@ -13,7 +14,7 @@ cmd_queue --help """ -from typing import Any, Callable, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Callable import rich import scriptconfig as scfg @@ -119,9 +120,10 @@ class CommonShowRun(CommonConfig): ) def _build_queue(config) -> 'cmd_queue.Queue': - import cmd_queue import json + import cmd_queue + queue = cmd_queue.Queue.create( size=max(1, config['workers']), backend=config['backend'], diff --git a/cmd_queue/monitor_app.py b/cmd_queue/monitor_app.py index a7df0c0..b0a3ad0 100644 --- a/cmd_queue/monitor_app.py +++ b/cmd_queue/monitor_app.py @@ -1,19 +1,19 @@ from __future__ import annotations + from types import ModuleType from typing import Any, Callable, Optional, Tuple try: from textual import events - from textual.widgets import ScrollView - from textual.widget import Widget from textual.views import DockView - from cmd_queue.util.textual_extensions import ExtHeader - from cmd_queue.util.textual_extensions import InstanceRunnableApp + from textual.widget import Widget + from textual.widgets import ScrollView # from rich.panel import Panel # from rich.text import Text from cmd_queue.util import richer as rich from cmd_queue.util import texter as textual + from cmd_queue.util.textual_extensions import ExtHeader, InstanceRunnableApp # import ubelt as ub except ImportError: rich: ModuleType = None # type: ignore diff --git a/cmd_queue/monitor_manifest.py b/cmd_queue/monitor_manifest.py index 455790c..0acc72a 100644 --- a/cmd_queue/monitor_manifest.py +++ b/cmd_queue/monitor_manifest.py @@ -1,4 +1,5 @@ from __future__ import annotations + # mypy: ignore-errors """ @@ -24,7 +25,6 @@ import ubelt as ub - SCHEMA_VERSION = 1 diff --git a/cmd_queue/serial_queue.py b/cmd_queue/serial_queue.py index 25d4f16..d885312 100644 --- a/cmd_queue/serial_queue.py +++ b/cmd_queue/serial_queue.py @@ -1,4 +1,5 @@ from __future__ import annotations + # mypy: ignore-errors """ @@ -12,8 +13,7 @@ import ubelt as ub from cmd_queue import base_queue -from cmd_queue.util import util_bash -from cmd_queue.util import util_tags +from cmd_queue.util import util_bash, util_tags class BashJob(base_queue.Job): @@ -411,8 +411,8 @@ def print_commands( with_status=with_status, with_gaurds=with_gaurds, **kwargs ) if style == 'rich': - from rich.syntax import Syntax from rich.console import Console + from rich.syntax import Syntax console = Console() console.print(Syntax(code, 'bash')) diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index 76e0621..f0ff586 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -46,7 +46,6 @@ from cmd_queue import base_queue # NOQA from cmd_queue.util import util_tags - try: from functools import cache # Python 3.9+ only except ImportError: @@ -475,8 +474,8 @@ def __init__( **kwargs: Any, ) -> None: super().__init__() - import uuid import time + import uuid self.jobs = [] if name is None: @@ -782,7 +781,6 @@ def run( 'falling back to inline monitor.' ) return self.monitor(onfail=onfail, onexit=onexit) - from cmd_queue.tmux_queue import has_stdin from cmd_queue.util.util_tmux import tmux as _tmux extra_args = [] @@ -871,11 +869,12 @@ def monitor( >>> queue.run() """ + import io import time + + import pandas as pd from rich.live import Live from rich.table import Table - import io - import pandas as pd jobid_history = set() diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index b6b4a1d..205ea13 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -1,4 +1,5 @@ from __future__ import annotations + # mypy: ignore-errors """ @@ -55,10 +56,9 @@ from typing import Any, Dict, Iterable, List, Optional import ubelt as ub -# import itertools as it -from cmd_queue import base_queue -from cmd_queue import serial_queue +# import itertools as it +from cmd_queue import base_queue, serial_queue from cmd_queue.util.util_tmux import tmux @@ -510,8 +510,8 @@ def order_jobs(self) -> None: rankings[rank].update(members) if 0: - from graphid.util import util_graphviz import kwplot + from graphid.util import util_graphviz kwplot.autompl() util_graphviz.show_nx(graph, fnum=1) @@ -1121,8 +1121,8 @@ def _build_failed_jobs_renderable(self) -> Any: failed, skipped, status_by_name = self._collect_failed_and_skipped() if not failed and not skipped: return None - from rich.table import Table from rich.console import Group + from rich.table import Table from rich.text import Text renderables = [] @@ -1187,6 +1187,7 @@ def _build_live_renderable(self): def _simple_rich_monitor(self, refresh_rate=0.4): import time + from rich.live import Live if 0: diff --git a/cmd_queue/util/textual_extensions.py b/cmd_queue/util/textual_extensions.py index 2ec20b3..7085b5c 100644 --- a/cmd_queue/util/textual_extensions.py +++ b/cmd_queue/util/textual_extensions.py @@ -1,28 +1,28 @@ from __future__ import annotations -# mypy: ignore-errors +# mypy: ignore-errors from typing import Any # from typing import Any try: - from textual.app import App - # from textual.driver import Driver # from typing import Type # from rich.console import Console import asyncio - - # from textual import events - from textual.widget import Widget - from textual.reactive import watch, Reactive from datetime import datetime + + from rich.console import RenderableType from rich.panel import Panel + from rich.repr import Result from rich.style import StyleType from rich.table import Table - from rich.console import RenderableType - from rich.repr import Result + from textual.app import App + from textual.reactive import Reactive, watch + + # from textual import events + from textual.widget import Widget except ImportError: App: type = object Widget: type = object diff --git a/cmd_queue/util/util_algo.py b/cmd_queue/util/util_algo.py index d5bdf6b..b32ff20 100644 --- a/cmd_queue/util/util_algo.py +++ b/cmd_queue/util/util_algo.py @@ -1,7 +1,7 @@ from __future__ import annotations -from typing import List from collections.abc import Sequence +from typing import List import numpy as np diff --git a/cmd_queue/util/util_networkx.py b/cmd_queue/util/util_networkx.py index 747cc52..f2cb315 100644 --- a/cmd_queue/util/util_networkx.py +++ b/cmd_queue/util/util_networkx.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Iterable, Hashable +from typing import Any, Hashable, Iterable def is_topological_order(graph: Any, node_order: Iterable[Hashable]) -> bool: diff --git a/cmd_queue/util/util_yaml.py b/cmd_queue/util/util_yaml.py index 237151d..c247912 100644 --- a/cmd_queue/util/util_yaml.py +++ b/cmd_queue/util/util_yaml.py @@ -1,7 +1,7 @@ import io import os -import ubelt as ub +import ubelt as ub NEW_RUAMEL = 1 diff --git a/dev/_devcheck_rich.py b/dev/_devcheck_rich.py index 9694c09..3b38891 100644 --- a/dev/_devcheck_rich.py +++ b/dev/_devcheck_rich.py @@ -6,10 +6,11 @@ https://github.com/Textualize/rich/issues/2120 """ -from rich.table import Table -from rich.live import Live import time +from rich.live import Live +from rich.table import Table + def random_rich_table(): import random @@ -54,8 +55,8 @@ def simple_pager_no_update(): def combined_scrolling_table(): from textual import events from textual.app import App - from textual.widgets import ScrollView from textual.widget import Widget + from textual.widgets import ScrollView class JobTable(Widget): def on_mount(self): diff --git a/docs/source/conf.py b/docs/source/conf.py index f190416..4ce3bea 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -111,10 +111,9 @@ # sys.path.insert(0, os.path.abspath('.')) # -- Project information ----------------------------------------------------- +from os.path import dirname, exists, join + import sphinx_rtd_theme -from os.path import exists -from os.path import dirname -from os.path import join def parse_version(fpath): diff --git a/examples/slurm_example.py b/examples/slurm_example.py index 1d0edf4..0d34cb2 100644 --- a/examples/slurm_example.py +++ b/examples/slurm_example.py @@ -1,7 +1,8 @@ def main(): - import cmd_queue import ubelt as ub + import cmd_queue + queue = cmd_queue.Queue.create( backend='slurm', partition='project123', account='user123', ntasks=1 ) diff --git a/examples/tmux_example.py b/examples/tmux_example.py index f87e6b4..e2c4d67 100644 --- a/examples/tmux_example.py +++ b/examples/tmux_example.py @@ -44,8 +44,8 @@ python ~/code/cmd_queue/examples/tmux_example.py --failures=0 """ -import ubelt as ub import scriptconfig as scfg +import ubelt as ub class TmuxExampleConfig(scfg.DataConfig): diff --git a/run_tests.py b/run_tests.py index 6f6fa46..9b876c4 100755 --- a/run_tests.py +++ b/run_tests.py @@ -1,8 +1,9 @@ #!/usr/bin/env python if __name__ == '__main__': - import pytest import sys + import pytest + package_name = 'cmd_queue' mod_dpath = 'cmd_queue' test_dpath = 'tests' diff --git a/setup.py b/setup.py index a71284d..bdbc260 100755 --- a/setup.py +++ b/setup.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # Generated by ~/code/xcookie/xcookie/builders/setup.py # based on part ~/code/xcookie/xcookie/rc/setup.py.in -import sys import re -from os.path import exists, dirname, join -from setuptools import find_packages -from setuptools import setup +import sys +from os.path import dirname, exists, join + +from setuptools import find_packages, setup def parse_version(fpath): diff --git a/tests/test_airflow_queue.py b/tests/test_airflow_queue.py index cd31160..5f7e854 100644 --- a/tests/test_airflow_queue.py +++ b/tests/test_airflow_queue.py @@ -1,11 +1,10 @@ """Tests for the Airflow backend without pytest fixtures.""" -import ubelt as ub import pytest +import ubelt as ub from cmd_queue.airflow_queue import AirflowQueue - airflow = pytest.importorskip('airflow') diff --git a/tests/test_bash_job_errors.py b/tests/test_bash_job_errors.py index ab3d747..6051048 100644 --- a/tests/test_bash_job_errors.py +++ b/tests/test_bash_job_errors.py @@ -42,11 +42,11 @@ def test_bash_job_errors(): dpath = ub.Path.appdir('cmd_queue', 'tests', 'test_bash_job_errors') dpath.delete().ensuredir() - from cmd_queue.serial_queue import BashJob - # Demo full boilerplate for a job with no dependencies import sys + from cmd_queue.serial_queue import BashJob + sys.executable script_fpath = demo_script(dpath) @@ -65,8 +65,10 @@ def test_bash_job_errors(): def test_tmux_queue_errors(): - import ubelt as ub import sys + + import ubelt as ub + import cmd_queue dpath = ub.Path.appdir('cmd_queue', 'tests', 'test_tmux_queue_errors') diff --git a/tests/test_bash_variants.py b/tests/test_bash_variants.py index 8eabc3a..09563aa 100644 --- a/tests/test_bash_variants.py +++ b/tests/test_bash_variants.py @@ -2,11 +2,13 @@ Tests for multiple variants of bash job text construction. """ -from cmd_queue.serial_queue import BashJob import subprocess import tempfile -import ubelt as ub + import kwutil +import ubelt as ub + +from cmd_queue.serial_queue import BashJob def test_primary_bash_job_text_variants(): diff --git a/tests/test_slurm_variants.py b/tests/test_slurm_variants.py index c8ec462..2baf9b8 100644 --- a/tests/test_slurm_variants.py +++ b/tests/test_slurm_variants.py @@ -1,6 +1,7 @@ # test_slurm_preamble_insertion.py import shlex -from cmd_queue.slurm_queue import SlurmQueue, SlurmJob + +from cmd_queue.slurm_queue import SlurmJob, SlurmQueue def _extract_wrap_payload(sbatch_args): diff --git a/tests/tests_mixed_hardware_tmux.py b/tests/tests_mixed_hardware_tmux.py index 6b92552..d79c5d9 100644 --- a/tests/tests_mixed_hardware_tmux.py +++ b/tests/tests_mixed_hardware_tmux.py @@ -5,9 +5,10 @@ def test_mixed_hardware(): tree_jobs. """ - import cmd_queue import ubelt as ub + import cmd_queue + backend = 'tmux' gres = [0, 1] From 725f3f6eb0c1c8436be8b86781b6c075214a89ac Mon Sep 17 00:00:00 2001 From: joncrall Date: Thu, 30 Apr 2026 17:03:08 -0400 Subject: [PATCH 25/27] Tweaks --- cmd_queue/airflow_queue.py | 5 +---- cmd_queue/base_queue.py | 2 -- cmd_queue/cli_boilerplate.py | 5 +---- cmd_queue/main.py | 5 +---- cmd_queue/monitor_manifest.py | 5 +---- cmd_queue/serial_queue.py | 5 +---- cmd_queue/slurm_queue.py | 3 +-- cmd_queue/slurmify.py | 3 +-- cmd_queue/tmux_queue.py | 5 +---- cmd_queue/util/textual_extensions.py | 4 ---- 10 files changed, 8 insertions(+), 34 deletions(-) diff --git a/cmd_queue/airflow_queue.py b/cmd_queue/airflow_queue.py index 9150260..adff444 100644 --- a/cmd_queue/airflow_queue.py +++ b/cmd_queue/airflow_queue.py @@ -1,7 +1,3 @@ -from __future__ import annotations - -# mypy: ignore-errors - r"""Airflow backend. Note: @@ -29,6 +25,7 @@ >>> print((queue.dags_dpath / 'cmdq_airflow_mwe.py').exists()) True """ +from __future__ import annotations import contextlib import os import time diff --git a/cmd_queue/base_queue.py b/cmd_queue/base_queue.py index f2bad14..b5dd237 100644 --- a/cmd_queue/base_queue.py +++ b/cmd_queue/base_queue.py @@ -1,6 +1,4 @@ from __future__ import annotations - -# mypy: ignore-errors from typing import Any, Dict, Iterable, List, Optional, Union import ubelt as ub diff --git a/cmd_queue/cli_boilerplate.py b/cmd_queue/cli_boilerplate.py index 9215d54..5c9aca9 100644 --- a/cmd_queue/cli_boilerplate.py +++ b/cmd_queue/cli_boilerplate.py @@ -1,7 +1,3 @@ -from __future__ import annotations - -# mypy: ignore-errors - """ This file defines a helper scriptconfig base config that can be used to help make cmd_queue CLIs so cmd_queue options are standardized and present at the @@ -99,6 +95,7 @@ >>> print('----------------') >>> my_cli_main(cmdline=0, run=1, print_queue=0, print_commands=0) """ +from __future__ import annotations import typing from typing import Any, Dict, Optional diff --git a/cmd_queue/main.py b/cmd_queue/main.py index c2c6ece..2254469 100644 --- a/cmd_queue/main.py +++ b/cmd_queue/main.py @@ -1,9 +1,5 @@ #!/usr/bin/env python3 # PYTHON_ARGCOMPLETE_OK -from __future__ import annotations - -# mypy: ignore-errors - """ This is the main script for the cmd_queue CLI. The :class:`CmdQueueConfig` defines the available options and its docstring provides a quick tutorial. @@ -14,6 +10,7 @@ cmd_queue --help """ +from __future__ import annotations from typing import TYPE_CHECKING, Any, Callable import rich diff --git a/cmd_queue/monitor_manifest.py b/cmd_queue/monitor_manifest.py index 0acc72a..e3c1440 100644 --- a/cmd_queue/monitor_manifest.py +++ b/cmd_queue/monitor_manifest.py @@ -1,7 +1,3 @@ -from __future__ import annotations - -# mypy: ignore-errors - """ Persistent metadata describing a queue at run-time so that a monitor process can reattach to it without holding a live queue object. @@ -20,6 +16,7 @@ a human queue name to the most recent manifest path so that ``cmd_queue monitor `` can find it. """ +from __future__ import annotations import json from typing import Any, Dict, Optional diff --git a/cmd_queue/serial_queue.py b/cmd_queue/serial_queue.py index d885312..d47426a 100644 --- a/cmd_queue/serial_queue.py +++ b/cmd_queue/serial_queue.py @@ -1,12 +1,9 @@ -from __future__ import annotations - -# mypy: ignore-errors - """ References: https://jmmv.dev/2018/03/shell-readability-strict-mode.html https://stackoverflow.com/questions/13195655/bash-set-x-without-it-being-printed """ +from __future__ import annotations import uuid from typing import Any, Dict, Iterable, List, Optional diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index f0ff586..3293040 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -1,5 +1,3 @@ -from __future__ import annotations - r""" Work in progress. The idea is to provide a TMUX queue and a SLURM queue that provide a common high level API, even though functionality might diverge, the @@ -39,6 +37,7 @@ >>> else: >>> print('output does not exist') """ +from __future__ import annotations from typing import Any, Dict, Iterable, List, Optional, Union import ubelt as ub diff --git a/cmd_queue/slurmify.py b/cmd_queue/slurmify.py index e2c68d4..7fb4171 100644 --- a/cmd_queue/slurmify.py +++ b/cmd_queue/slurmify.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 r""" Helper script to wrap a command with sbatch, but using a more srun like syntax. @@ -15,8 +16,6 @@ -- \ python -c 'import sys; print("hello world"); sys.exit(0)' """ - -#!/usr/bin/env python3 import scriptconfig as scfg import ubelt as ub diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 205ea13..043dcd9 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -1,7 +1,3 @@ -from __future__ import annotations - -# mypy: ignore-errors - """ A very simple queue based on tmux and bash @@ -52,6 +48,7 @@ >>> queue.run() """ +from __future__ import annotations import uuid from typing import Any, Dict, Iterable, List, Optional diff --git a/cmd_queue/util/textual_extensions.py b/cmd_queue/util/textual_extensions.py index 7085b5c..0c1410c 100644 --- a/cmd_queue/util/textual_extensions.py +++ b/cmd_queue/util/textual_extensions.py @@ -1,10 +1,6 @@ from __future__ import annotations - -# mypy: ignore-errors from typing import Any -# from typing import Any - try: # from textual.driver import Driver From 63ddc99bce921d13f2f152789806e3ee59c61a78 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 30 Apr 2026 22:51:54 +0000 Subject: [PATCH 26/27] tests: cover Queue.submit log-flag plumbing to BashJob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Asserts that Queue.submit(..., log=True) lands on BashJob.log and that the rendered command section gets the expected ``() 2>&1 | tee `` wrapper. Also covers log=False and the current default (False). Catches a regression class where ``submit`` drops or shadows the ``log`` kwarg without other tests noticing — log files would just silently stop being written. Co-Authored-By: Claude Opus 4.7 --- tests/test_submit_log_flag.py | 83 +++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 tests/test_submit_log_flag.py diff --git a/tests/test_submit_log_flag.py b/tests/test_submit_log_flag.py new file mode 100644 index 0000000..1658824 --- /dev/null +++ b/tests/test_submit_log_flag.py @@ -0,0 +1,83 @@ +""" +Test that ``Queue.submit(..., log=True)`` plumbs the flag through to the +underlying ``BashJob`` and that the finalized script tees the command. + +This is the integration boundary: ``test_bash_variants.py`` covers +``BashJob`` directly (i.e. the renderer), but downstream callers +(kwdagger and others) reach ``BashJob`` only via ``Queue.submit(...)``. +A regression where ``submit`` drops or shadows the ``log`` kwarg would +silently disable tee logging without any other test catching it, which +is exactly the kind of thing this test is here to catch. +""" +import cmd_queue + + +def _command_section(text: str) -> str: + """Return the slice of ``text`` between the ``# command:`` marker and + the ``# after_command:`` marker. Lets the assertion focus on the + actual job command and not bookkeeping lines that may also contain + paths the test doesn't care about. + """ + start = text.find('# command:') + end = text.find('# after_command:') + if start == -1 or end == -1: + return text + return text[start:end] + + +def test_submit_with_log_true_produces_tee(): + queue = cmd_queue.Queue.create(backend='serial', name='log-flag-true', size=1) + job = queue.submit('echo hi', name='job1', log=True) + + assert job.log is True, 'log=True should land on BashJob.log' + + text = job.finalize_text(with_status=True, with_gaurds=True) + cmd = _command_section(text) + + assert '| tee ' in cmd, ( + 'Queue.submit(log=True) should produce a tee in the rendered ' + 'command section. Got:\n' + cmd + ) + assert str(job.log_fpath) in cmd, ( + 'Tee target must be the BashJob.log_fpath so log inspection ' + 'tools find it. Got:\n' + cmd + ) + + +def test_submit_with_log_false_omits_tee(): + queue = cmd_queue.Queue.create(backend='serial', name='log-flag-false', size=1) + job = queue.submit('echo hi', name='job1', log=False) + + assert job.log is False, 'log=False should land on BashJob.log' + + text = job.finalize_text(with_status=True, with_gaurds=True) + cmd = _command_section(text) + + assert '| tee ' not in cmd, ( + 'Queue.submit(log=False) must NOT add a tee to the command. ' + 'Got:\n' + cmd + ) + + +def test_submit_log_default_omits_tee(): + """The current ``BashJob`` default is ``log=False`` for backward + compatibility. If a caller does not pass ``log``, no tee should + appear. Tracked here so any default flip is caught explicitly. + """ + queue = cmd_queue.Queue.create(backend='serial', name='log-flag-default', size=1) + job = queue.submit('echo hi', name='job1') + + assert job.log is False, 'BashJob.log default is False' + + text = job.finalize_text(with_status=True, with_gaurds=True) + cmd = _command_section(text) + assert '| tee ' not in cmd, ( + 'Default Queue.submit (no log kwarg) must NOT tee. Got:\n' + cmd + ) + + +if __name__ == '__main__': + test_submit_with_log_true_produces_tee() + test_submit_with_log_false_omits_tee() + test_submit_log_default_omits_tee() + print('All submit log-flag tests passed.') From 44c7e93590fa4338039e57165d507c3c235df6d1 Mon Sep 17 00:00:00 2001 From: agent Date: Fri, 8 May 2026 22:02:01 +0000 Subject: [PATCH 27/27] Add monitor='hybrid' mode: inline UI + attachable tmux side session Press [a] from the live status table to attach (or switch-client) to a detached cmd_queue monitor tmux session running alongside; [q] stops watching while the queue keeps running. The side session is killed when the inline monitor exits. Reorganizes the monitor mode taxonomy so each value names a single intent: 'hybrid' (default) for inline+tmux, 'inline' for current-shell only, 'tmux' for detached-only, 'none' for headless block. 'hybrid' warns and falls back to inline when tmux is unavailable. Wires the [a] keybind into both the simple-rich (rich.Live + cbreak) and textual monitor paths, mirrors the new mode through the slurm backend, and adds plumbing-layer tests that mock the tmux helpers so the suite runs without a tmux server. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 4 + cmd_queue/monitor_app.py | 13 ++ cmd_queue/slurm_queue.py | 104 +++++++++++++--- cmd_queue/tmux_queue.py | 208 +++++++++++++++++++++++++++---- cmd_queue/util/util_tmux.py | 22 ++++ examples/tmux_example.py | 35 ++++-- tests/test_tmux_attach.py | 242 ++++++++++++++++++++++++++++++++++++ 7 files changed, 572 insertions(+), 56 deletions(-) create mode 100644 tests/test_tmux_attach.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c6fedd..ee0c83f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Added: * generalized the monitor so it can be launched in an independent process and reports errors better. +* New `monitor='hybrid'` mode (now the default for tmux and slurm `run()`): renders the live status table inline in the current shell and *also* spawns a detached `cmd_queue monitor` tmux session. Press `[a]` from the inline UI to attach (or `switch-client` when already inside tmux), `[q]` to stop watching while the queue keeps running. The side session is killed when the inline monitor exits. + +### Changed +* `monitor` kwarg accepted values are now `'hybrid' | 'inline' | 'tmux' | 'none'`. `'inline'` reverts to its original pure-current-shell meaning; the `'hybrid'` mode covers the inline+tmux combination. The default is `'hybrid'`, so a no-arg `run()` now spawns an attachable tmux side session whenever tmux is available. ### Fixed: * cwd will now handle failures if the directory doesnt exist in the bash queue diff --git a/cmd_queue/monitor_app.py b/cmd_queue/monitor_app.py index b0a3ad0..842fb5b 100644 --- a/cmd_queue/monitor_app.py +++ b/cmd_queue/monitor_app.py @@ -61,11 +61,14 @@ def __init__( self, table_fn: Callable[[], Tuple[Any, bool, Any]], kill_fn: Optional[Callable[[], Any]] = None, + attach_session: Optional[str] = None, **kwargs: Any, ) -> None: self.job_table = JobTable(table_fn) self.kill_fn = kill_fn self.graceful_exit = False + self.attach_session = attach_session + self.attach_requested = False super().__init__(**kwargs) self._title = 'Command Queue' @@ -114,10 +117,20 @@ def demo_table_fn(): async def on_load(self, event: Any) -> None: await self.bind('q', 'quit', 'Quit') + if self.attach_session is not None: + await self.bind('a', 'attach_monitor', 'Attach monitor') async def action_quit(self) -> None: await self.shutdown() + async def action_attach_monitor(self) -> None: + # The actual tmux attach has to happen *after* the textual app + # releases the terminal. Flag it and shut down; the caller + # (TMUXMultiQueue._textual_monitor) checks ``attach_requested`` + # and performs the attach + re-launches the app. + self.attach_requested = True + await self.shutdown() + async def on_mount(self, event: Any) -> None: # from textual.layouts.vertical import VerticalLayout diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index 3293040..7c7c800 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -737,7 +737,7 @@ def run( system: bool = False, onfail: str = '', onexit: str = '', - monitor: str = 'inline', + monitor: str = 'hybrid', **kw: Any, ) -> Optional[Any]: """ @@ -745,13 +745,22 @@ def run( Args: monitor (str): where the live status UI runs while - ``block=True``. ``'inline'`` (default) renders in the - current shell. ``'tmux'`` spawns ``cmd_queue monitor`` - in a detached tmux session so the UI survives the - calling shell closing — useful for slurm jobs whose - workers run on the cluster long after the submit shell - might be gone. ``'none'`` skips the UI but still blocks - when ``block=True``. + ``block=True``. + + * ``'hybrid'`` (default): inline UI in the current + shell *and* a detached ``cmd_queue monitor`` tmux + session you can press ``[a]`` to attach to. The + side session is killed when the inline monitor + exits. Falls back to ``'inline'`` when tmux is + unavailable. + * ``'inline'``: renders only in the current shell. + * ``'tmux'``: spawns ``cmd_queue monitor`` only in a + detached tmux session so the UI survives the + calling shell closing — useful for slurm jobs + whose workers run on the cluster long after the + submit shell might be gone. + * ``'none'``: skips the UI but still blocks when + ``block=True``. """ if not self.is_available(): raise Exception('slurm backend is not available') @@ -763,6 +772,46 @@ def run( return None if monitor == 'inline': return self.monitor(onfail=onfail, onexit=onexit) + if monitor == 'hybrid': + from cmd_queue.util.util_tmux import tmux as _tmux + + side_session = None + if ub.find_exe('tmux'): + extra_args = [] + if onfail: + extra_args.append(f'--onfail={onfail}') + if onexit: + extra_args.append(f'--onexit={onexit}') + side_session = f'cmdq-monitor-{self.queue_id}' + from rich import print as rich_print + + rich_print( + f'[dim]Spawned attachable monitor in tmux session[/dim] ' + f'{side_session} [dim](press [a] to attach)[/dim]' + ) + _tmux.spawn_monitor_session( + session_name=side_session, + manifest_path=manifest_path, + attach=False, + verbose=0, + extra_args=extra_args, + ) + else: + import warnings + + warnings.warn( + "monitor='hybrid' requested but tmux not found; " + 'falling back to inline-only monitor.' + ) + try: + return self.monitor( + onfail=onfail, + onexit=onexit, + side_session=side_session, + ) + finally: + if side_session and _tmux.has_session(side_session): + _tmux.kill_session(side_session, verbose=0) if monitor == 'none': from rich import print as rich_print @@ -821,7 +870,8 @@ def _is_finished() -> bool: ) return None raise ValueError( - f"monitor must be one of 'inline', 'tmux', 'none'; got {monitor!r}" + "monitor must be one of 'hybrid', 'inline', 'tmux', 'none'; " + f'got {monitor!r}' ) def monitor( @@ -832,6 +882,7 @@ def monitor( with_textual: str | bool = 'auto', onfail: str = '', onexit: str = '', + side_session: Optional[str] = None, ) -> Optional[Any]: """ Monitor progress until the jobs are done. @@ -869,12 +920,15 @@ def monitor( """ import io - import time import pandas as pd - from rich.live import Live from rich.table import Table + from cmd_queue.tmux_queue import ( + _attach_hint_renderable, + _run_live_with_attach, + ) + jobid_history = set() num_at_start = None @@ -1065,13 +1119,29 @@ def _update_agg_state() -> None: agg_state['total'] = len(job_status_table) try: - table, finished = update_status_table() + import sys + + from rich.console import Group + + def _build_renderable() -> Any: + table, finished = update_status_table() + hint = ( + _attach_hint_renderable(side_session) + if side_session + else None + ) + renderable = Group(table, hint) if hint is not None else table + # The slurm Live loop tracks completion via a separate + # variable than tmux; agg_state is updated post-loop. + return renderable, finished, None + refresh_rate = 0.4 - with Live(table, refresh_per_second=4) as live: - while not finished: - time.sleep(refresh_rate) - table, finished = update_status_table() - live.update(table) + use_keys = side_session is not None and sys.stdin.isatty() + _run_live_with_attach( + build_renderable=_build_renderable, + refresh_rate=refresh_rate, + side_session=side_session if use_keys else None, + ) _update_agg_state() except KeyboardInterrupt: from rich.prompt import Confirm diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index 043dcd9..f5d8f38 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -711,7 +711,7 @@ def run( with_textual: str = 'auto', check_other_sessions: Optional[bool] = None, other_session_handler: str = 'auto', - monitor: str = 'inline', + monitor: str = 'hybrid', **kw: Any, ) -> None: """ @@ -727,13 +727,20 @@ def run( monitor (str): Where the live status UI runs while ``block=True``. - * ``'inline'`` (default): renders in the current shell, just - like today. Closing the shell loses the view. + * ``'hybrid'`` (default): renders the inline UI in the + current shell *and* spawns a detached ``cmd_queue + monitor`` tmux session alongside. Press ``[a]`` from + the inline UI to attach (or switch-client) to the + tmux session; ``[q]`` stops watching. The side + session is killed when the inline monitor exits. + * ``'inline'``: renders only in the current shell. No + tmux session is spawned. Closing the shell loses the + view. * ``'tmux'``: spawns ``cmd_queue monitor --manifest=...`` - in a detached tmux session and (when interactive) attaches - the user to it. The current process still blocks until - jobs finish (and runs the post-run cleanup), so detaching - the tmux UI does not return control to the caller. + only in a detached tmux session. The current process + blocks until jobs finish (and runs the post-run + cleanup), so detaching the tmux UI does not return + control to the caller. * ``'none'``: no UI; the call still blocks via a headless state-file poll when ``block=True``. """ @@ -837,6 +844,45 @@ def _dispatch_monitor( onfail=onfail, onexit=onexit, ) + if monitor == 'hybrid': + side_session = None + if ub.find_exe('tmux'): + extra_args = [] + if onfail: + extra_args.append(f'--onfail={onfail}') + if onexit: + extra_args.append(f'--onexit={onexit}') + side_session = f'cmdq-monitor-{self.pathid}' + from rich import print as rich_print + + rich_print( + f'[dim]Spawned attachable monitor in tmux session[/dim] ' + f'{side_session} [dim](press [a] to attach)[/dim]' + ) + tmux.spawn_monitor_session( + session_name=side_session, + manifest_path=manifest_path, + attach=False, + verbose=0, + extra_args=extra_args, + ) + else: + import warnings + + warnings.warn( + "monitor='hybrid' requested but tmux not found; " + 'falling back to inline-only monitor.' + ) + try: + return self.monitor( + with_textual=with_textual, + onfail=onfail, + onexit=onexit, + side_session=side_session, + ) + finally: + if side_session and tmux.has_session(side_session): + tmux.kill_session(side_session, verbose=0) if monitor == 'none': from rich import print as rich_print @@ -895,7 +941,8 @@ def _is_finished() -> bool: self._print_done_summary(agg_state) return agg_state raise ValueError( - f"monitor must be one of 'inline', 'tmux', 'none'; got {monitor!r}" + "monitor must be one of 'hybrid', 'inline', 'tmux', 'none'; " + f'got {monitor!r}' ) def _headless_block_until_done(self, refresh_rate: float = 1.0) -> Any: @@ -953,6 +1000,7 @@ def monitor( with_textual: str | bool = 'auto', onfail: str = '', onexit: str = '', + side_session: Optional[str] = None, ) -> None: """ Monitor progress until the jobs are done. @@ -969,6 +1017,11 @@ def monitor( the user can investigate.") onexit (str): if ``'capture'``, dump tmux pane contents after the queue finishes. + side_session (str | None): name of an attachable tmux + monitor session running alongside the inline UI. When + set, the inline monitor binds ``a`` to attach (or + switch-client) to this session. Caller is responsible + for spawning/cleaning up the session. CommandLine: xdoctest -m cmd_queue.tmux_queue TMUXMultiQueue.monitor:0 @@ -1021,9 +1074,9 @@ def monitor( with_textual = False if with_textual: - self._textual_monitor() + self._textual_monitor(side_session=side_session) else: - self._simple_rich_monitor(refresh_rate) + self._simple_rich_monitor(refresh_rate, side_session=side_session) table, finished, agg_state = self._build_status_table() if onexit == 'capture': self.capture() @@ -1032,7 +1085,7 @@ def monitor( self._print_done_summary(agg_state) return agg_state - def _textual_monitor(self): + def _textual_monitor(self, side_session: Optional[str] = None): from rich import print as rich_print if 0: @@ -1043,12 +1096,23 @@ def _textual_monitor(self): is_running = True while is_running: table_fn = self._build_status_table - app = CmdQueueMonitorApp(table_fn, kill_fn=self.kill) + app = CmdQueueMonitorApp( + table_fn, kill_fn=self.kill, attach_session=side_session + ) app.run() table, finished, agg_state = self._build_status_table() rich_print(table) + if getattr(app, 'attach_requested', False): + # User pressed 'a' inside the textual UI; perform the + # attach (or switch-client) now that textual has released + # the terminal, then re-enter the textual loop. + app.attach_requested = False + if side_session is not None: + _attach_or_switch(side_session) + continue + if app.graceful_exit: is_running = False else: @@ -1174,32 +1238,35 @@ def _build_failed_jobs_renderable(self) -> Any: return renderables[0] return Group(*renderables) - def _build_live_renderable(self): + def _build_live_renderable(self, side_session: Optional[str] = None): from rich.console import Group table, finished, agg_state = self._build_status_table() failed = self._build_failed_jobs_renderable() - renderable = Group(table, failed) if failed is not None else table + hint = _attach_hint_renderable(side_session) if side_session else None + parts = [p for p in (table, failed, hint) if p is not None] + renderable = Group(*parts) if len(parts) > 1 else parts[0] return renderable, finished, agg_state - def _simple_rich_monitor(self, refresh_rate=0.4): - import time - - from rich.live import Live + def _simple_rich_monitor( + self, refresh_rate=0.4, side_session: Optional[str] = None + ): + import sys if 0: print('Kill commands:') for command in self._kill_commands(): print(command) + + use_keys = side_session is not None and sys.stdin.isatty() try: - renderable, finished, agg_state = self._build_live_renderable() - with Live(renderable, refresh_per_second=4) as live: - while not finished: - time.sleep(refresh_rate) - renderable, finished, agg_state = ( - self._build_live_renderable() - ) - live.update(renderable) + _run_live_with_attach( + build_renderable=lambda: self._build_live_renderable( + side_session=side_session, + ), + refresh_rate=refresh_rate, + side_session=side_session if use_keys else None, + ) except KeyboardInterrupt: from rich.prompt import Confirm @@ -1483,6 +1550,95 @@ def _from_manifest(cls, manifest: Dict[str, Any]) -> 'TMUXMultiQueue': return self +def _attach_or_switch(session_name: str) -> None: + """Attach the user's terminal to ``session_name`` (or switch-client + if already inside tmux). Thin module-level shim around the static + method so call sites don't need to import the class.""" + tmux.attach_or_switch(session_name) + + +def _attach_hint_renderable(session_name: str) -> Any: + """Footer text shown beneath the live status table when an + attachable side-session exists, so the user can discover the + keybindings without reading the docs.""" + import os + + from rich.text import Text + + verb = 'switch-client' if os.environ.get('TMUX') else 'attach' + return Text.from_markup( + rf'[dim]Press \[a] to {verb} to monitor session ' + f"'{session_name}' • \\[q] to stop watching (queue keeps " + 'running)[/dim]' + ) + + +def _run_live_with_attach( + build_renderable: Any, + refresh_rate: float, + side_session: Optional[str], +) -> None: + """Run a ``rich.live.Live`` loop that also accepts ``a``/``q`` + keypresses when ``side_session`` is provided. + + The loop exits when the renderable's ``finished`` flag goes True + (queue done) or when the user presses ``q``. On ``a`` the Live + display is suspended, the user is attached to the side tmux + session, and the loop resumes after they detach. + """ + import sys + import time + + from rich.live import Live + + if side_session is None: + # Plain path with no input handling — preserves old behavior + # exactly when there is no side session to attach to. + renderable, finished, _ = build_renderable() + with Live(renderable, refresh_per_second=4) as live: + while not finished: + time.sleep(refresh_rate) + renderable, finished, _ = build_renderable() + live.update(renderable) + return + + import select + import termios + import tty + + fd = sys.stdin.fileno() + old_settings = termios.tcgetattr(fd) + try: + while True: + tty.setcbreak(fd) + attach_requested = False + renderable, finished, _ = build_renderable() + with Live(renderable, refresh_per_second=4) as live: + while not finished: + ready, _, _ = select.select( + [sys.stdin], [], [], refresh_rate + ) + if ready: + ch = sys.stdin.read(1) + if ch in ('a', 'A'): + attach_requested = True + break + if ch in ('q', 'Q'): + return + if ch == '\x03': # Ctrl-C + raise KeyboardInterrupt + renderable, finished, _ = build_renderable() + live.update(renderable) + if not attach_requested: + return + # Restore the terminal so tmux gets a clean tty, attach, + # then loop back into Live with cbreak re-enabled. + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + _attach_or_switch(side_session) + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + + def has_stdin() -> bool: import sys diff --git a/cmd_queue/util/util_tmux.py b/cmd_queue/util/util_tmux.py index 92c341c..c61e32d 100644 --- a/cmd_queue/util/util_tmux.py +++ b/cmd_queue/util/util_tmux.py @@ -234,6 +234,28 @@ def block_with_attach_prompt( finally: termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + @staticmethod + def attach_or_switch(session_name: str) -> None: + """Bring ``session_name`` to the foreground for the user. + + Inside an existing tmux client, this issues ``switch-client`` so + we don't try to nest tmux. Otherwise we ``attach-session`` and + let the foreground process inherit the tty (the user can detach + with the usual binding to come back). + """ + import os + + if os.environ.get('TMUX'): + ub.cmd( + ['tmux', 'switch-client', '-t', session_name], + check=False, + ) + else: + ub.cmd( + ['tmux', 'attach-session', '-t', session_name], + check=False, + ) + @staticmethod def list_panes(target_session: str) -> List[Dict[str, str]]: """ diff --git a/examples/tmux_example.py b/examples/tmux_example.py index e2c4d67..cb7784e 100644 --- a/examples/tmux_example.py +++ b/examples/tmux_example.py @@ -1,16 +1,20 @@ """ Demonstrates the ``monitor`` kwarg on the tmux backend. -Three modes are illustrated: +Four monitor modes are illustrated: - * ``monitor='inline'`` (default) — the live status table renders in - the current shell, just like before. Closing the shell loses the - view and (depending on your terminal) may kill the parent process. + * ``monitor='hybrid'`` (default) — the live status table renders in + the current shell *and* a detached ``cmd_queue monitor`` tmux + session is spawned alongside. Press ``[a]`` from the inline UI to + attach (or switch-client) to the tmux session, ``[q]`` to stop + watching. - * ``monitor='tmux'`` — the status table renders in a *separate* - detached tmux session. The original shell still blocks until jobs - finish, but the visible UI (and the post-run cleanup) lives in a - session that survives the shell closing. Run with ``--mode=tmux``. + * ``monitor='inline'`` — only the in-shell live UI; no tmux session + is spawned. + + * ``monitor='tmux'`` — only the detached tmux session, no inline + UI. Useful when you want the visible status table (and post-run + cleanup) to survive the calling shell closing. * ``monitor='none'`` — no live UI; ``run()`` headless-blocks until jobs finish. Useful in non-interactive scripts. The reattach hint @@ -31,10 +35,15 @@ for a clean run, or higher numbers for more failures. CommandLine: - # Default: inline monitor (current shell), one forced failure + # Default (hybrid): inline monitor in this shell + attachable tmux + # session. Press [a] in the inline UI to jump into the tmux monitor, + # [q] to stop watching (queue keeps running). python ~/code/cmd_queue/examples/tmux_example.py - # Spawn the monitor in its own tmux session and attach + # Inline-only, no side tmux session + python ~/code/cmd_queue/examples/tmux_example.py --mode=inline + + # Spawn the monitor only in a tmux session (no inline view) python ~/code/cmd_queue/examples/tmux_example.py --mode=tmux # Run silently and reattach manually with `cmd_queue monitor ` @@ -54,9 +63,9 @@ class TmuxExampleConfig(scfg.DataConfig): """ mode = scfg.Value( - 'tmux', + 'hybrid', help='Where the monitor UI runs.', - choices=['inline', 'tmux', 'none'], + choices=['hybrid', 'inline', 'tmux', 'none'], ) name = scfg.Value( 'tmux-example', @@ -211,6 +220,6 @@ def submit_sleep_chain(base_name, total_sleep, depends=None, fail=False): if __name__ == '__main__': """ CommandLine: - python ~/code/cmd_queue/examples/tmux_example.py --mode=tmux + python ~/code/cmd_queue/examples/tmux_example.py """ main() diff --git a/tests/test_tmux_attach.py b/tests/test_tmux_attach.py new file mode 100644 index 0000000..9dd168a --- /dev/null +++ b/tests/test_tmux_attach.py @@ -0,0 +1,242 @@ +""" +Tests for the ``monitor`` argument on ``Queue.run()`` for the tmux and +slurm backends — specifically the new ``'hybrid'`` mode. + +The full end-to-end behavior (rich.Live + cbreak + tmux attach) requires +an interactive TTY and a live tmux server, so these tests stay at the +plumbing layer: + +* ``monitor='hybrid'`` (the default) on an inline-monitor run spawns the + side ``cmd_queue monitor`` tmux session and tears it down afterwards. +* ``monitor='inline'`` leaves the existing inline-only behavior intact + (no side spawn). +* ``monitor='hybrid'`` falls back gracefully when tmux is missing. +* The renderable hint and the textual app expose the right keybinding. + +The tmux helpers are monkeypatched so the tests run without a tmux server. +""" +from __future__ import annotations + +from typing import Any, Dict, List + +import pytest + + +def _patch_tmux_helpers(monkeypatch: pytest.MonkeyPatch) -> Dict[str, List[Any]]: + """Replace the tmux helper static methods with recorders. + + Returns a dict of call-log lists keyed by helper name so each test + can assert on what the run() path triggered. + """ + from cmd_queue.util import util_tmux + + calls: Dict[str, List[Any]] = { + 'spawn': [], + 'kill': [], + 'has': [], + 'attach_or_switch': [], + } + + def fake_spawn( + session_name: str, + manifest_path: Any, + attach: bool = True, + verbose: int = 0, + extra_args: Any = None, + ) -> Dict[str, Any]: + calls['spawn'].append( + { + 'session_name': session_name, + 'manifest_path': str(manifest_path), + 'attach': attach, + 'extra_args': list(extra_args or []), + } + ) + return {'session_name': session_name, 'attach_command': 'noop'} + + def fake_kill(session_name: str, verbose: int = 3) -> None: + calls['kill'].append(session_name) + + def fake_has(session_name: str) -> bool: + calls['has'].append(session_name) + # Pretend the session exists between spawn and kill so the + # finally-clause actually exercises the kill path. + return True + + def fake_attach(session_name: str) -> None: + calls['attach_or_switch'].append(session_name) + + monkeypatch.setattr( + util_tmux.tmux, 'spawn_monitor_session', staticmethod(fake_spawn) + ) + monkeypatch.setattr( + util_tmux.tmux, 'kill_session', staticmethod(fake_kill) + ) + monkeypatch.setattr( + util_tmux.tmux, 'has_session', staticmethod(fake_has) + ) + monkeypatch.setattr( + util_tmux.tmux, 'attach_or_switch', staticmethod(fake_attach) + ) + return calls + + +def _make_tmux_queue(tmp_path): + from cmd_queue.tmux_queue import TMUXMultiQueue + + queue = TMUXMultiQueue(size=1, name='tmux-attach-test', dpath=tmp_path) + queue.submit('true') + return queue + + +def test_hybrid_mode_spawns_and_kills_side_session(monkeypatch, tmp_path): + """With ``monitor='hybrid'`` the dispatcher must spawn the side + session before invoking ``self.monitor()`` and kill it afterwards + (so we don't leak tmux sessions per run).""" + calls = _patch_tmux_helpers(monkeypatch) + monkeypatch.setattr('ubelt.find_exe', lambda name: f'/usr/bin/{name}') + queue = _make_tmux_queue(tmp_path) + + monitor_calls: List[Dict[str, Any]] = [] + + def fake_monitor(self, **kwargs): + # Record what the dispatcher passed and assert the session was + # already spawned by this point. + monitor_calls.append(kwargs) + return {'status': 'done'} + + monkeypatch.setattr( + 'cmd_queue.tmux_queue.TMUXMultiQueue.monitor', fake_monitor + ) + + queue._dispatch_monitor( + monitor='hybrid', + manifest_path=tmp_path / 'manifest.json', + onfail='kill', + onexit='', + with_textual='auto', + ) + + assert len(calls['spawn']) == 1, 'side session must be spawned exactly once' + spawn = calls['spawn'][0] + assert spawn['session_name'].startswith('cmdq-monitor-') + assert '--onfail=kill' in spawn['extra_args'] + assert spawn['attach'] is False, ( + 'spawn_monitor_session(attach=False) — the inline path takes ' + 'over the foreground separately via the [a] keybind' + ) + + assert len(monitor_calls) == 1 + assert monitor_calls[0]['side_session'] == spawn['session_name'] + + assert calls['kill'] == [spawn['session_name']], ( + 'side session must be killed in the dispatcher finally-clause' + ) + + +def test_inline_mode_does_not_spawn(monkeypatch, tmp_path): + """``monitor='inline'`` is the explicit opt-out: no side session + should be created and ``monitor()`` should be invoked without a + ``side_session`` argument (so the inline UI keeps its old shape).""" + calls = _patch_tmux_helpers(monkeypatch) + monkeypatch.setattr('ubelt.find_exe', lambda name: f'/usr/bin/{name}') + queue = _make_tmux_queue(tmp_path) + + seen: List[Dict[str, Any]] = [] + + def fake_monitor(self, **kwargs): + seen.append(kwargs) + return None + + monkeypatch.setattr( + 'cmd_queue.tmux_queue.TMUXMultiQueue.monitor', fake_monitor + ) + queue._dispatch_monitor( + monitor='inline', + manifest_path=tmp_path / 'manifest.json', + onfail='kill', + onexit='', + with_textual='auto', + ) + + assert calls['spawn'] == [], "inline mode must not spawn a side session" + assert calls['kill'] == [], 'no kill if nothing was spawned' + assert 'side_session' not in seen[0], ( + 'inline path goes through the legacy monitor() signature, ' + 'with no side_session kwarg' + ) + + +def test_hybrid_falls_back_when_tmux_missing(monkeypatch, tmp_path): + """If tmux is unavailable, hybrid degrades gracefully to inline- + only (a warning is emitted and ``monitor()`` runs with + ``side_session=None``).""" + calls = _patch_tmux_helpers(monkeypatch) + monkeypatch.setattr('ubelt.find_exe', lambda name: None) + queue = _make_tmux_queue(tmp_path) + + seen: List[Dict[str, Any]] = [] + + def fake_monitor(self, **kwargs): + seen.append(kwargs) + return None + + monkeypatch.setattr( + 'cmd_queue.tmux_queue.TMUXMultiQueue.monitor', fake_monitor + ) + with pytest.warns(UserWarning, match='tmux not found'): + queue._dispatch_monitor( + monitor='hybrid', + manifest_path=tmp_path / 'manifest.json', + onfail='kill', + onexit='', + with_textual='auto', + ) + + assert calls['spawn'] == [], 'no tmux → no side spawn' + assert seen[0]['side_session'] is None + + +def test_attach_hint_renderable_mentions_session(): + """The footer text under the live status table must call out the + keybinding and name the session, otherwise users won't discover the + feature.""" + from cmd_queue.tmux_queue import _attach_hint_renderable + + hint = _attach_hint_renderable('cmdq-monitor-foo') + rendered = hint.plain # rich.Text → strip markup + assert '[a]' in rendered + assert '[q]' in rendered + assert 'cmdq-monitor-foo' in rendered + + +def test_textual_app_binds_a_only_when_attach_session_set(): + """The textual app should only register the 'a' keybind when an + attach session is actually wired up — otherwise the binding would + flag-and-shut-down with nowhere to attach to.""" + pytest.importorskip('textual') + try: + from cmd_queue.monitor_app import CmdQueueMonitorApp + except ImportError: + pytest.skip('textual monitor app is unavailable on this build') + if CmdQueueMonitorApp is None: # gated in tmux_queue.py + pytest.skip('textual monitor app is gated off') + + def table_fn(): + return None, True, {} + + app_with = CmdQueueMonitorApp(table_fn, attach_session='cmdq-monitor-x') + app_without = CmdQueueMonitorApp(table_fn) + + assert app_with.attach_session == 'cmdq-monitor-x' + assert app_with.attach_requested is False + assert app_without.attach_session is None + assert hasattr(app_with, 'action_attach_monitor'), ( + 'attach action must exist so the binding has a target' + ) + + +if __name__ == '__main__': + import sys + + sys.exit(pytest.main([__file__, '-v']))