diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3e7a18b..1026d4a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -94,30 +94,36 @@ stages: - 'echo "Installing helpers: setuptools"' - python -m uv pip install setuptools>=0.8 setuptools_scm wheel build -U - 'echo "Installing helpers: tomli and pkginfo"' - - python -m uv pip install tomli pkginfo + - python -m uv pip install tomli pkginfo packaging - |- export WHEEL_FPATH=$(python -c "if 1: import pathlib + from packaging import tags + from packaging.utils import parse_wheel_filename dist_dpath = pathlib.Path('dist') - candidates = list(dist_dpath.glob('cmd_queue*.whl')) - candidates += list(dist_dpath.glob('cmd_queue*.tar.gz')) - fpath = sorted(candidates)[-1] + wheels = sorted(dist_dpath.glob('cmd_queue*.whl')) + if wheels: + sys_tags = set(tags.sys_tags()) + matching = [] + for w in wheels: + try: + _, _, _, wheel_tags = parse_wheel_filename(w.name) + except Exception: + continue + if any(t in sys_tags for t in wheel_tags): + matching.append(w) + fpath = sorted(matching or wheels)[-1] + else: + sdists = sorted(dist_dpath.glob('cmd_queue*.tar.gz')) + if not sdists: + raise SystemExit('No wheel artifacts found in wheelhouse') + fpath = sdists[-1] print(str(fpath).replace(chr(92), chr(47))) ") - - |- - export MOD_VERSION=$(python -c "if 1: - from pkginfo import Wheel, SDist - import pathlib - fpath = '$WHEEL_FPATH' - cls = Wheel if fpath.endswith('.whl') else SDist - item = cls(fpath) - print(item.version) - ") - echo "WHEEL_FPATH=$WHEEL_FPATH" - echo "INSTALL_EXTRAS=$INSTALL_EXTRAS" - echo "UV_RESOLUTION=$UV_RESOLUTION" - - echo "MOD_VERSION=$MOD_VERSION" - - python -m pip install --prefer-binary "cmd_queue[$INSTALL_EXTRAS]==$MOD_VERSION" -f dist + - python -m pip install --prefer-binary "${WHEEL_FPATH}[${INSTALL_EXTRAS}]" - echo "Install finished." - echo "Creating test sandbox directory" - export WORKSPACE_DNAME="sandbox" @@ -164,30 +170,36 @@ stages: - 'echo "Installing helpers: setuptools"' - python -m uv pip install setuptools>=0.8 setuptools_scm wheel build -U - 'echo "Installing helpers: tomli and pkginfo"' - - python -m uv pip install tomli pkginfo + - python -m uv pip install tomli pkginfo packaging - |- export WHEEL_FPATH=$(python -c "if 1: import pathlib + from packaging import tags + from packaging.utils import parse_wheel_filename dist_dpath = pathlib.Path('dist') - candidates = list(dist_dpath.glob('cmd_queue*.whl')) - candidates += list(dist_dpath.glob('cmd_queue*.tar.gz')) - fpath = sorted(candidates)[-1] + wheels = sorted(dist_dpath.glob('cmd_queue*.whl')) + if wheels: + sys_tags = set(tags.sys_tags()) + matching = [] + for w in wheels: + try: + _, _, _, wheel_tags = parse_wheel_filename(w.name) + except Exception: + continue + if any(t in sys_tags for t in wheel_tags): + matching.append(w) + fpath = sorted(matching or wheels)[-1] + else: + sdists = sorted(dist_dpath.glob('cmd_queue*.tar.gz')) + if not sdists: + raise SystemExit('No wheel artifacts found in wheelhouse') + fpath = sdists[-1] print(str(fpath).replace(chr(92), chr(47))) ") - - |- - export MOD_VERSION=$(python -c "if 1: - from pkginfo import Wheel, SDist - import pathlib - fpath = '$WHEEL_FPATH' - cls = Wheel if fpath.endswith('.whl') else SDist - item = cls(fpath) - print(item.version) - ") - echo "WHEEL_FPATH=$WHEEL_FPATH" - echo "INSTALL_EXTRAS=$INSTALL_EXTRAS" - echo "UV_RESOLUTION=$UV_RESOLUTION" - - echo "MOD_VERSION=$MOD_VERSION" - - python -m pip install --prefer-binary "cmd_queue[$INSTALL_EXTRAS]==$MOD_VERSION" -f dist + - python -m pip install --prefer-binary "${WHEEL_FPATH}[${INSTALL_EXTRAS}]" - echo "Install finished." - echo "Creating test sandbox directory" - export WORKSPACE_DNAME="sandbox" @@ -234,30 +246,36 @@ stages: - 'echo "Installing helpers: setuptools"' - python -m uv pip install setuptools>=0.8 setuptools_scm wheel build -U - 'echo "Installing helpers: tomli and pkginfo"' - - python -m uv pip install tomli pkginfo + - python -m uv pip install tomli pkginfo packaging - |- export WHEEL_FPATH=$(python -c "if 1: import pathlib + from packaging import tags + from packaging.utils import parse_wheel_filename dist_dpath = pathlib.Path('dist') - candidates = list(dist_dpath.glob('cmd_queue*.whl')) - candidates += list(dist_dpath.glob('cmd_queue*.tar.gz')) - fpath = sorted(candidates)[-1] + wheels = sorted(dist_dpath.glob('cmd_queue*.whl')) + if wheels: + sys_tags = set(tags.sys_tags()) + matching = [] + for w in wheels: + try: + _, _, _, wheel_tags = parse_wheel_filename(w.name) + except Exception: + continue + if any(t in sys_tags for t in wheel_tags): + matching.append(w) + fpath = sorted(matching or wheels)[-1] + else: + sdists = sorted(dist_dpath.glob('cmd_queue*.tar.gz')) + if not sdists: + raise SystemExit('No wheel artifacts found in wheelhouse') + fpath = sdists[-1] print(str(fpath).replace(chr(92), chr(47))) ") - - |- - export MOD_VERSION=$(python -c "if 1: - from pkginfo import Wheel, SDist - import pathlib - fpath = '$WHEEL_FPATH' - cls = Wheel if fpath.endswith('.whl') else SDist - item = cls(fpath) - print(item.version) - ") - echo "WHEEL_FPATH=$WHEEL_FPATH" - echo "INSTALL_EXTRAS=$INSTALL_EXTRAS" - echo "UV_RESOLUTION=$UV_RESOLUTION" - - echo "MOD_VERSION=$MOD_VERSION" - - python -m pip install --prefer-binary "cmd_queue[$INSTALL_EXTRAS]==$MOD_VERSION" -f dist + - python -m pip install --prefer-binary "${WHEEL_FPATH}[${INSTALL_EXTRAS}]" - echo "Install finished." - echo "Creating test sandbox directory" - export WORKSPACE_DNAME="sandbox" @@ -304,30 +322,36 @@ stages: - 'echo "Installing helpers: setuptools"' - python -m uv pip install setuptools>=0.8 setuptools_scm wheel build -U - 'echo "Installing helpers: tomli and pkginfo"' - - python -m uv pip install tomli pkginfo + - python -m uv pip install tomli pkginfo packaging - |- export WHEEL_FPATH=$(python -c "if 1: import pathlib + from packaging import tags + from packaging.utils import parse_wheel_filename dist_dpath = pathlib.Path('dist') - candidates = list(dist_dpath.glob('cmd_queue*.whl')) - candidates += list(dist_dpath.glob('cmd_queue*.tar.gz')) - fpath = sorted(candidates)[-1] + wheels = sorted(dist_dpath.glob('cmd_queue*.whl')) + if wheels: + sys_tags = set(tags.sys_tags()) + matching = [] + for w in wheels: + try: + _, _, _, wheel_tags = parse_wheel_filename(w.name) + except Exception: + continue + if any(t in sys_tags for t in wheel_tags): + matching.append(w) + fpath = sorted(matching or wheels)[-1] + else: + sdists = sorted(dist_dpath.glob('cmd_queue*.tar.gz')) + if not sdists: + raise SystemExit('No wheel artifacts found in wheelhouse') + fpath = sdists[-1] print(str(fpath).replace(chr(92), chr(47))) ") - - |- - export MOD_VERSION=$(python -c "if 1: - from pkginfo import Wheel, SDist - import pathlib - fpath = '$WHEEL_FPATH' - cls = Wheel if fpath.endswith('.whl') else SDist - item = cls(fpath) - print(item.version) - ") - echo "WHEEL_FPATH=$WHEEL_FPATH" - echo "INSTALL_EXTRAS=$INSTALL_EXTRAS" - echo "UV_RESOLUTION=$UV_RESOLUTION" - - echo "MOD_VERSION=$MOD_VERSION" - - python -m pip install --prefer-binary "cmd_queue[$INSTALL_EXTRAS]==$MOD_VERSION" -f dist + - python -m pip install --prefer-binary "${WHEEL_FPATH}[${INSTALL_EXTRAS}]" - echo "Install finished." - echo "Creating test sandbox directory" - export WORKSPACE_DNAME="sandbox" @@ -359,29 +383,6 @@ test/sdist/minimal-loose/cp314-linux-x86_64: image: python:3.14 needs: - build/sdist -build/cp39-linux-x86_64: - <<: *build_wheel_template - image: python:3.9 -test/minimal-loose/cp39-linux-x86_64: - <<: *test_minimal-loose_template - image: python:3.9 - needs: - - build/cp39-linux-x86_64 -test/full-loose/cp39-linux-x86_64: - <<: *test_full-loose_template - image: python:3.9 - needs: - - build/cp39-linux-x86_64 -test/minimal-strict/cp39-linux-x86_64: - <<: *test_minimal-strict_template - image: python:3.9 - needs: - - build/cp39-linux-x86_64 -test/full-strict/cp39-linux-x86_64: - <<: *test_full-strict_template - image: python:3.9 - needs: - - build/cp39-linux-x86_64 build/cp310-linux-x86_64: <<: *build_wheel_template image: python:3.10 @@ -507,6 +508,9 @@ lint: - python -m pip install pip uv -U - python -m uv pip install -r requirements/linting.txt - ./run_linter.sh + - python -m pip install ty + - pip install -r requirements/runtime.txt + - ty check ./cmd_queue allow_failure: true gpgsign/wheels: <<: *common_template @@ -531,17 +535,16 @@ gpgsign/wheels: - export GPG_EXECUTABLE=gpg - export GPG_KEYID=$(cat dev/public_gpg_key) - echo "GPG_KEYID = $GPG_KEYID" - # Decrypt and import GPG Keys / trust - # note the variable pointed to by VARNAME_CI_SECRET is a protected variables only available on main and release branch - - source dev/secrets_configuration.sh - - CI_SECRET=${!VARNAME_CI_SECRET} - $GPG_EXECUTABLE --version - openssl version - $GPG_EXECUTABLE --list-keys - # note CI_KITWARE_SECRET is a protected variables only available on main and release branch - - CIS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CIS -d -a -in dev/ci_public_gpg_key.pgp.enc | $GPG_EXECUTABLE --import - - CIS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CIS -d -a -in dev/gpg_owner_trust.enc | $GPG_EXECUTABLE --import-ownertrust - - CIS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CIS -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | $GPG_EXECUTABLE --import + - echo "Importing GPG keys from CI secrets" + - printf '%s' "$GPG_PUBLIC_KEY_B64" | base64 -d | $GPG_EXECUTABLE --import + - printf '%s' "$GPG_OWNER_TRUST_B64" | base64 -d | $GPG_EXECUTABLE --import-ownertrust + - printf '%s' "$GPG_SECRET_SIGNING_SUBKEY_B64" | base64 -d | $GPG_EXECUTABLE --import + - "IMPORTED_FPR=$($GPG_EXECUTABLE --list-keys --with-colons \"$GPG_KEYID\" | awk -F: '/^fpr/ { print $10; exit }')" + - '[[ "$IMPORTED_FPR" == "$GPG_KEYID" ]] || { echo "ERROR: fingerprint mismatch: $IMPORTED_FPR != $GPG_KEYID"; exit 1; }' + - 'echo "GPG fingerprint verified: $IMPORTED_FPR"' - GPG_SIGN_CMD="$GPG_EXECUTABLE --batch --yes --detach-sign --armor --local-user $GPG_KEYID" - |- WHEEL_PATHS=(dist/*.whl dist/*.tar.gz) @@ -562,8 +565,6 @@ gpgsign/wheels: needs: - job: build/sdist artifacts: true - - job: build/cp39-linux-x86_64 - artifacts: true - job: build/cp310-linux-x86_64 artifacts: true - job: build/cp311-linux-x86_64 @@ -605,7 +606,6 @@ deploy/wheels: # do sed twice to handle the case of https clone with and without a read token URL_HOST=$(git remote get-url origin | sed -e 's|https\?://.*@||g' | sed -e 's|https\?://||g' | sed -e 's|git@||g' | sed -e 's|:|/|g') source dev/secrets_configuration.sh - CI_SECRET=${!VARNAME_CI_SECRET} PUSH_TOKEN=${!VARNAME_PUSH_TOKEN} echo "URL_HOST = $URL_HOST" # A git config user name and email is required. Set if needed. diff --git a/CHANGELOG.md b/CHANGELOG.md index 4121c7e..ee0c83f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,13 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## Version 0.3.0 - Unreleased +### Added: +* generalized the monitor so it can be launched in an independent process and reports errors better. +* New `monitor='hybrid'` mode (now the default for tmux and slurm `run()`): renders the live status table inline in the current shell and *also* spawns a detached `cmd_queue monitor` tmux session. Press `[a]` from the inline UI to attach (or `switch-client` when already inside tmux), `[q]` to stop watching while the queue keeps running. The side session is killed when the inline monitor exits. + +### Changed +* `monitor` kwarg accepted values are now `'hybrid' | 'inline' | 'tmux' | 'none'`. `'inline'` reverts to its original pure-current-shell meaning; the `'hybrid'` mode covers the inline+tmux combination. The default is `'hybrid'`, so a no-arg `run()` now spawns an attachable tmux side session whenever tmux is available. + ### Fixed: * cwd will now handle failures if the directory doesnt exist in the bash queue * general improvements to bash script construction with per-job preamble commands @@ -14,7 +21,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Changed * deprecate `header_commands` for `preamble` -* Dropped support for 3.8 +* Dropped support for 3.8 and 3.9 * Transition from stubs to type annotations. diff --git a/cmd_queue/__init__.py b/cmd_queue/__init__.py index 23987ae..5dc31d6 100644 --- a/cmd_queue/__init__.py +++ b/cmd_queue/__init__.py @@ -313,7 +313,8 @@ 'base_queue': ['Queue'], } from cmd_queue import base_queue - -from cmd_queue.base_queue import (Queue,) +from cmd_queue.base_queue import ( + Queue, +) __all__ = ['Queue', 'base_queue'] diff --git a/cmd_queue/airflow_queue.py b/cmd_queue/airflow_queue.py index e2280ed..adff444 100644 --- a/cmd_queue/airflow_queue.py +++ b/cmd_queue/airflow_queue.py @@ -1,6 +1,3 @@ -from __future__ import annotations -# mypy: ignore-errors - r"""Airflow backend. Note: @@ -28,11 +25,12 @@ >>> print((queue.dags_dpath / 'cmdq_airflow_mwe.py').exists()) True """ +from __future__ import annotations import contextlib import os import time import uuid -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Iterable, List, Optional import ubelt as ub @@ -43,6 +41,7 @@ class AirflowJob(base_queue.Job): """ Represents a airflow job that hasn't been executed yet """ + def __init__( self, command: str, @@ -61,7 +60,7 @@ def __init__( if name is None: name = 'job-' + str(uuid.uuid4()) if depends is not None and not ub.iterable(depends): - depends = [depends] + depends = [depends] # type: ignore self.unused_kwargs = kwargs self.command = command self.name = name @@ -132,8 +131,14 @@ def __init__( self.name = name stamp = time.strftime('%Y%m%dT%H%M%S') self.unused_kwargs = kwargs - self.queue_id = name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8] - base_dpath = ub.Path(dpath) if dpath is not None else ub.Path.appdir('cmd_queue') / 'airflow' + self.queue_id = ( + name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8] + ) + base_dpath = ( + ub.Path(dpath) + if dpath is not None + else ub.Path.appdir('cmd_queue') / 'airflow' + ) self.dpath = (base_dpath / self.queue_id).ensuredir() self.dags_dpath = (self.dpath / 'dags').ensuredir() self.log_dpath = (self.dpath / 'logs').ensuredir() @@ -142,7 +147,11 @@ def __init__( self.preamble = [] self.all_depends = None self.job_info_dpath = self.dpath / 'job_info' - home = ub.Path(airflow_home) if airflow_home is not None else (self.dpath / 'airflow_home') + home = ( + ub.Path(airflow_home) + if airflow_home is not None + else (self.dpath / 'airflow_home') + ) self.airflow_home = home.ensuredir() if preamble is not None: self.add_preamble_command(preamble) @@ -188,7 +197,7 @@ def _airflow_env(self): env['AIRFLOW__CORE__LOAD_EXAMPLES'] = 'False' env.setdefault( 'AIRFLOW__DATABASE__SQL_ALCHEMY_CONN', - f"sqlite:///{self.airflow_home / 'airflow.db'}", + f'sqlite:///{self.airflow_home / "airflow.db"}', ) return env @@ -213,16 +222,20 @@ def run(self, block: bool = True, system: bool = False) -> None: env = self._airflow_env() detach = not block if detach: - raise NotImplementedError('Non-blocking airflow runs are not implemented yet') + raise NotImplementedError( + 'Non-blocking airflow runs are not implemented yet' + ) with self._patched_env(env): - from airflow.utils import db + import contextlib + import sys + + from airflow.models.dag import DagModel from airflow.models.dagbag import DagBag - from airflow.models.serialized_dag import DagVersion from airflow.models.dagbundle import DagBundleModel - from airflow.models.dag import DagModel + from airflow.models.serialized_dag import DagVersion + from airflow.utils import db from airflow.utils.session import create_session - import sys - import contextlib + if hasattr(db, 'resetdb'): db.resetdb() elif hasattr(db, 'check_and_run_migrations'): @@ -231,10 +244,16 @@ def run(self, block: bool = True, system: bool = False) -> None: db.upgradedb() else: db.initdb() - dag_bag = DagBag(dag_folder=os.fspath(self.dags_dpath), include_examples=False, safe_mode=False) + dag_bag = DagBag( + dag_folder=os.fspath(self.dags_dpath), + include_examples=False, + safe_mode=False, + ) dag = dag_bag.get_dag(self.name) if dag is None: - raise RuntimeError(f'Could not load DAG {self.name} from {self.dags_dpath}') + raise RuntimeError( + f'Could not load DAG {self.name} from {self.dags_dpath}' + ) # Airflow 3 requires DAG bundle versioning unless explicitly disabled. if not getattr(dag, 'disable_bundle_versioning', False): dag.disable_bundle_versioning = True @@ -245,7 +264,11 @@ def run(self, block: bool = True, system: bool = False) -> None: session.flush() dag_model = session.get(DagModel, dag.dag_id) if dag_model is None: - dag_model = DagModel(dag_id=dag.dag_id, fileloc=dag.fileloc, bundle_name=bundle_name) + dag_model = DagModel( + dag_id=dag.dag_id, + fileloc=dag.fileloc, + bundle_name=bundle_name, + ) else: dag_model.fileloc = dag.fileloc dag_model.bundle_name = bundle_name @@ -256,7 +279,10 @@ def run(self, block: bool = True, system: bool = False) -> None: # be closed unexpectedly. Ensure Airflow writes to the real stdout/ # stderr streams to avoid "I/O operation on closed file" errors # during tests. - with contextlib.redirect_stdout(sys.__stdout__), contextlib.redirect_stderr(sys.__stderr__): + with ( + contextlib.redirect_stdout(sys.__stdout__), + contextlib.redirect_stderr(sys.__stderr__), + ): dag.test() def read_state(self): @@ -268,17 +294,22 @@ def read_state(self): """ env = self._airflow_env() with self._patched_env(env): - from airflow.utils.session import create_session from airflow.models.dagrun import DagRun from airflow.models.taskinstance import TaskInstance + from airflow.utils.session import create_session from sqlalchemy import select + try: from airflow.utils.state import TaskInstanceState + success_state = TaskInstanceState.SUCCESS failed_state = TaskInstanceState.FAILED skipped_state = TaskInstanceState.SKIPPED except Exception: # pragma: no cover - from airflow.utils.state import State as TaskInstanceState # type: ignore + from airflow.utils.state import ( + State as TaskInstanceState, # type: ignore + ) + success_state = TaskInstanceState.SUCCESS failed_state = TaskInstanceState.FAILED skipped_state = TaskInstanceState.SKIPPED @@ -311,23 +342,22 @@ def read_state(self): summary['status'] = getattr(dagrun, 'state', 'unknown') summary['run_id'] = dagrun.run_id - ti_stmt = ( - select(TaskInstance.state) - .where( - TaskInstance.dag_id == dagrun.dag_id, - TaskInstance.run_id == dagrun.run_id, - ) + ti_stmt = select(TaskInstance.state).where( + TaskInstance.dag_id == dagrun.dag_id, + TaskInstance.run_id == dagrun.run_id, ) states = list(session.scalars(ti_stmt)) passed = sum(state == success_state for state in states) failed = sum(state == failed_state for state in states) skipped = sum(state == skipped_state for state in states) - summary.update({ - 'total': len(states), - 'passed': passed, - 'failed': failed, - 'skipped': skipped, - }) + summary.update( + { + 'total': len(states), + 'passed': passed, + 'failed': failed, + 'skipped': skipped, + } + ) return summary def finalize_text(self) -> str: @@ -337,7 +367,7 @@ def finalize_text(self) -> str: topo_jobs = [self.named_jobs[n] for n in nx.topological_sort(graph)] header = ub.codeblock( - f''' + f""" from airflow import DAG from datetime import timezone from datetime import datetime as datetime_cls @@ -350,7 +380,7 @@ def finalize_text(self) -> str: tags=['cmd_queue'], ) jobs = dict() - ''' + """ ) parts = [header] for job in topo_jobs: @@ -359,7 +389,9 @@ def finalize_text(self) -> str: for job in topo_jobs: for dep in job.depends or []: if dep is not None: - parts.append(f'jobs[{job.name!r}].set_upstream(jobs[{dep.name!r}])') + parts.append( + f'jobs[{job.name!r}].set_upstream(jobs[{dep.name!r}])' + ) # if depends: # for dep in depends: @@ -398,7 +430,8 @@ def submit(self, command: str, **kwargs: Any) -> AirflowJob: depends = [depends] depends = [ self.named_jobs[dep] if isinstance(dep, str) else dep - for dep in depends] + for dep in depends + ] job = AirflowJob(command, depends=depends, **kwargs) self.jobs.append(job) self.num_real_jobs += 1 @@ -432,9 +465,10 @@ def print_commands( code = self.finalize_text() if style == 'rich': + from rich.console import Console from rich.panel import Panel from rich.syntax import Syntax - from rich.console import Console + console = Console() console.print(Panel(Syntax(code, 'python'), title=str(self.fpath))) # console.print(Syntax(code, 'bash')) @@ -461,11 +495,14 @@ def demo() -> None: from cmd_queue.airflow_queue import * # NOQA demo() """ - from airflow import DAG - from datetime import timezone from datetime import datetime as datetime_cls + from datetime import timezone + + from airflow import DAG from airflow.operators.bash import BashOperator - now = datetime_cls.utcnow().replace(tzinfo=timezone.utc) + + now = datetime_cls.now(timezone.utc) + # now = datetime_cls.utcnow().replace(tzinfo=timezone.utc) dag = DAG( 'mycustomdag', start_date=now, @@ -473,9 +510,11 @@ def demo() -> None: tags=['example'], ) t1 = BashOperator(task_id='task1', bash_command='date', dag=dag) - t2 = BashOperator(task_id='task2', bash_command='echo hi 1 && true', dag=dag) + t2 = BashOperator( + task_id='task2', bash_command='echo hi 1 && true', dag=dag + ) t2.set_upstream(t1) - dag.run(verbose=True, local=True) + dag.run(verbose=True, local=True) # type: ignore if __name__ == '__main__': diff --git a/cmd_queue/base_queue.py b/cmd_queue/base_queue.py index e7af2b8..b5dd237 100644 --- a/cmd_queue/base_queue.py +++ b/cmd_queue/base_queue.py @@ -1,23 +1,20 @@ from __future__ import annotations -# mypy: ignore-errors - from typing import Any, Dict, Iterable, List, Optional, Union import ubelt as ub -class DuplicateJobError(KeyError): - ... +class DuplicateJobError(KeyError): ... -class UnknownBackendError(KeyError): - ... +class UnknownBackendError(KeyError): ... class Job(ub.NiceRepr): """ Base class for a job """ + def __init__( self, command: Optional[str] = None, @@ -27,7 +24,7 @@ def __init__( ) -> None: # This is unused, should the slurm and bash job reuse this? if depends is not None and not ub.iterable(depends): - depends = [depends] + depends = [depends] # type: ignore self.name = name self.command = command self.depends = depends @@ -130,7 +127,9 @@ def sync(self) -> Queue: """ graph = self._dependency_graph() # Find the jobs that nobody depends on - sink_jobs = [graph.nodes[n]['job'] for n, d in graph.out_degree if d == 0] + sink_jobs = [ + graph.nodes[n]['job'] for n, d in graph.out_degree if d == 0 + ] # All new jobs must depend on these jobs self.all_depends = sink_jobs return self @@ -142,12 +141,21 @@ def write(self) -> Any: """ import os import stat + text = self.finalize_text() self.fpath.parent.ensuredir() self.fpath.write_text(text) - os.chmod(self.fpath, ( - stat.S_IXUSR | stat.S_IXGRP | stat.S_IRUSR | - stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)) + os.chmod( + self.fpath, + ( + stat.S_IXUSR + | stat.S_IXGRP + | stat.S_IRUSR + | stat.S_IWUSR + | stat.S_IRGRP + | stat.S_IWGRP + ), + ) return self.fpath def submit(self, command: Union[str, Job], **kwargs: Any) -> Job: @@ -170,7 +178,9 @@ def submit(self, command: Union[str, Job], **kwargs: Any) -> Job: if isinstance(command, str): name = kwargs.get('name', None) if name is None: - name = kwargs['name'] = self.name + '-job-{}'.format(self.num_real_jobs) + name = kwargs['name'] = self.name + '-job-{}'.format( + self.num_real_jobs + ) # TODO: make sure name is path safe. if ':' in name: @@ -193,9 +203,14 @@ def submit(self, command: Union[str, Job], **kwargs: Any) -> Job: try: depends = [ self.named_jobs[dep] if isinstance(dep, str) else dep - for dep in depends] + for dep in depends + ] except Exception: - print('self.named_jobs = {}'.format(ub.urepr(self.named_jobs, nl=1))) + print( + 'self.named_jobs = {}'.format( + ub.urepr(self.named_jobs, nl=1) + ) + ) raise job = serial_queue.BashJob(command, depends=depends, **kwargs) elif isinstance(command, Job): @@ -219,10 +234,13 @@ def submit(self, command: Union[str, Job], **kwargs: Any) -> Job: @classmethod def _backend_classes(cls): - from cmd_queue import tmux_queue - from cmd_queue import serial_queue - from cmd_queue import slurm_queue - from cmd_queue import airflow_queue + from cmd_queue import ( + airflow_queue, + serial_queue, + slurm_queue, + tmux_queue, + ) + lut = { 'serial': serial_queue.SerialQueue, 'tmux': tmux_queue.TMUXMultiQueue, @@ -252,17 +270,21 @@ def create(cls, backend: str = 'serial', **kwargs: Any) -> Queue: """ if backend == 'serial': from cmd_queue import serial_queue + kwargs.pop('size', None) self = serial_queue.SerialQueue(**kwargs) elif backend == 'tmux': from cmd_queue import tmux_queue + self = tmux_queue.TMUXMultiQueue(**kwargs) elif backend == 'slurm': from cmd_queue import slurm_queue + kwargs.pop('size', None) self = slurm_queue.SlurmQueue(**kwargs) elif backend == 'airflow': from cmd_queue import airflow_queue + kwargs.pop('size', None) self = airflow_queue.AirflowQueue(**kwargs) else: @@ -279,7 +301,7 @@ def write_network_text( try: import rich as rich_mod except ImportError: - rich_mod = None + rich_mod = None # type: ignore if rich == 'auto': rich = rich_mod is not None @@ -289,20 +311,26 @@ def write_network_text( print_ = print import networkx as nx + graph = self._dependency_graph() if reduced: print_('\nGraph (reduced):') try: reduced_graph = nx.transitive_reduction(graph) - nx.write_network_text(reduced_graph, path=print_, end='', - vertical_chains=vertical_chains) + nx.write_network_text( + reduced_graph, + path=print_, + end='', + vertical_chains=vertical_chains, + ) except Exception as ex: print_(f'ex={ex}') print_('\n') else: print_('\nGraph:') - nx.write_network_text(graph, path=print_, end='', - vertical_chains=vertical_chains) + nx.write_network_text( + graph, path=print_, end='', vertical_chains=vertical_chains + ) def print_commands( self, @@ -340,17 +368,23 @@ def print_commands( colors = kwargs.get('colors', None) if colors is not None: ub.schedule_deprecation( - 'cmd_queue', 'colors', 'arg', + 'cmd_queue', + 'colors', + 'arg', migration='use style="plain" | "rich" | "colors" instead', - deprecate='now') + deprecate='now', + ) if not colors: style = 'plain' with_rich = kwargs.get('with_rich', None) if with_rich is not None: ub.schedule_deprecation( - 'cmd_queue', 'with_rich', 'arg', + 'cmd_queue', + 'with_rich', + 'arg', migration='use use style="plain" | "rich" | "colors" instead', - deprecate='now') + deprecate='now', + ) if with_rich: style = 'rich' if style == 'auto': @@ -358,16 +392,19 @@ def print_commands( # style = 'rich' if colors else 'plain' from cmd_queue.util import util_tags + exclude_tags = util_tags.Tags.coerce(exclude_tags) code = self.finalize_text( with_status=with_status, with_gaurds=with_gaurds, with_locks=with_locks, - exclude_tags=exclude_tags) + exclude_tags=exclude_tags, + ) if style == 'rich': - from rich.syntax import Syntax - from rich.panel import Panel from rich.console import Console + from rich.panel import Panel + from rich.syntax import Syntax + console = Console() console.print(Panel(Syntax(code, 'bash'), title=str(self.fpath))) elif style == 'colors': @@ -381,19 +418,25 @@ def print_commands( def rprint(self, **kwargs: Any) -> None: ub.schedule_deprecation( - 'cmd_queue', name='rprint', type='arg', + 'cmd_queue', + name='rprint', + type='arg', migration='print_commands', ) self.print_commands(**kwargs) - def print_graph(self, reduced: bool = True, vertical_chains: bool = False) -> None: + def print_graph( + self, reduced: bool = True, vertical_chains: bool = False + ) -> None: """ Renders the dependency graph to an "network text" Args: reduced (bool): if True only show the implicit dependency forest """ - self.write_network_text(reduced=reduced, vertical_chains=vertical_chains) + self.write_network_text( + reduced=reduced, vertical_chains=vertical_chains + ) def _dependency_graph(self) -> Any: """ @@ -414,10 +457,13 @@ def _dependency_graph(self) -> Any: >>> self.print_graph() """ import networkx as nx + graph = nx.DiGraph() duplicate_names = ub.find_duplicates(self.jobs, key=lambda x: x.name) if duplicate_names: - print('duplicate_names = {}'.format(ub.urepr(duplicate_names, nl=1))) + print( + 'duplicate_names = {}'.format(ub.urepr(duplicate_names, nl=1)) + ) raise Exception('Job names must be unique') for index, job in enumerate(self.jobs): @@ -429,20 +475,29 @@ def _dependency_graph(self) -> Any: graph.add_edge(dep.name, job.name) return graph - def monitor(self) -> None: + def monitor( + self, + refresh_rate: float = 0.4, + with_textual: str | bool = 'auto', + onfail: str = '', + onexit: str = '', + ) -> None: print('monitor not implemented') def _coerce_style( self, style: str = 'auto', with_rich: Optional[bool] = None, - colors: bool = True, + colors: bool | int = True, ) -> str: # Helper if with_rich is not None: ub.schedule_deprecation( - 'cmd_queue', 'with_rich', 'arg', - migration='use style="rich" instead') + 'cmd_queue', + 'with_rich', + 'arg', + migration='use style="rich" instead', + ) if with_rich: style = 'rich' if style == 'auto': diff --git a/cmd_queue/cli_boilerplate.py b/cmd_queue/cli_boilerplate.py index cf11843..5c9aca9 100644 --- a/cmd_queue/cli_boilerplate.py +++ b/cmd_queue/cli_boilerplate.py @@ -1,6 +1,3 @@ -from __future__ import annotations -# mypy: ignore-errors - """ This file defines a helper scriptconfig base config that can be used to help make cmd_queue CLIs so cmd_queue options are standardized and present at the @@ -98,15 +95,15 @@ >>> print('----------------') >>> my_cli_main(cmdline=0, run=1, print_queue=0, print_commands=0) """ +from __future__ import annotations +import typing from typing import Any, Dict, Optional import scriptconfig as scfg import ubelt as ub - -__docstubs__ = """ -import cmd_queue -""" +if typing.TYPE_CHECKING: + import cmd_queue class CMDQueueConfig(scfg.DataConfig): @@ -155,40 +152,91 @@ class CMDQueueConfig(scfg.DataConfig): slurm_options = scfg.Value(None, help='if the backend is slurm, provide a YAML dictionary for things like partition / etc...', group='cmd-queue') """ - run = scfg.Value(False, isflag=True, help='if False, only prints the commands, otherwise executes them', group='cmd-queue') - - backend = scfg.Value('tmux', help=('The cmd_queue backend. Can be tmux, slurm, or serial'), group='cmd-queue') - - queue_name = scfg.Value(None, help='overwrite the default queue name', group='cmd-queue') - - print_commands = scfg.Value('auto', isflag=True, help='enable / disable rprint before exec', group='cmd-queue') - - print_queue = scfg.Value('auto', isflag=True, help='print the cmd queue DAG', group='cmd-queue') - with_textual = scfg.Value('auto', isflag=True, help='setting for cmd-queue monitoring', group='cmd-queue') - - other_session_handler = scfg.Value('ask', help='for tmux backend only. How to handle conflicting sessions. Can be ask, kill, or ignore, or auto', group='cmd-queue') - - virtualenv_cmd = scfg.Value(None, type=str, help=ub.paragraph( - ''' + run = scfg.Value( + False, + isflag=True, + help='if False, only prints the commands, otherwise executes them', + group='cmd-queue', + ) + + backend = scfg.Value( + 'tmux', + help=('The cmd_queue backend. Can be tmux, slurm, or serial'), + group='cmd-queue', + ) + + monitor = scfg.Value( + 'inline', + help=('where the live status UI runs while'), + group='cmd-queue', + choices=['inline', 'tmux'], + ) + + queue_name = scfg.Value( + None, help='overwrite the default queue name', group='cmd-queue' + ) + + print_commands = scfg.Value( + 'auto', + isflag=True, + help='enable / disable rprint before exec', + group='cmd-queue', + ) + + print_queue = scfg.Value( + 'auto', isflag=True, help='print the cmd queue DAG', group='cmd-queue' + ) + + with_textual = scfg.Value( + 'auto', + isflag=True, + help='setting for cmd-queue monitoring', + group='cmd-queue', + ) + + other_session_handler = scfg.Value( + 'ask', + help='for tmux backend only. How to handle conflicting sessions. Can be ask, kill, or ignore, or auto', + group='cmd-queue', + ) + + virtualenv_cmd = scfg.Value( + None, + type=str, + help=ub.paragraph( + """ Command to start the appropriate virtual environment if your bashrc - does not start it by default.'''), group='cmd-queue') + does not start it by default.""" + ), + group='cmd-queue', + ) # TODO: add global preamble argument - tmux_workers = scfg.Value(8, help='number of tmux workers in the queue for the tmux backend', group='cmd-queue') + tmux_workers = scfg.Value( + 8, + help='number of tmux workers in the queue for the tmux backend', + group='cmd-queue', + ) - slurm_options = scfg.Value(None, help=ub.paragraph( - ''' + slurm_options = scfg.Value( + None, + help=ub.paragraph( + """ if the backend is slurm, provide a YAML dictionary for things like partition / etc... - '''), group='cmd-queue') + """ + ), + group='cmd-queue', + ) def __post_init__(self) -> None: from cmd_queue.util.util_yaml import Yaml + self.slurm_options = Yaml.coerce(self.slurm_options) or {} - def create_queue(config, **kwargs: Any) -> "cmd_queue.Queue": + def create_queue(config, **kwargs: Any) -> 'cmd_queue.Queue': """ Create an empty queue based on options specified in this config @@ -199,19 +247,20 @@ def create_queue(config, **kwargs: Any) -> "cmd_queue.Queue": cmd_queue.Queue """ import cmd_queue + queuekw = {} if config.backend == 'slurm': queuekw.update(config.slurm_options) elif config.backend == 'tmux': - queuekw.update({ - 'size': config.tmux_workers, - }) + queuekw.update( + { + 'size': config.tmux_workers, + } + ) queuekw.update(kwargs) if 'name' not in queuekw: queuekw['name'] = config.queue_name - queue = cmd_queue.Queue.create( - backend=config.backend, - **queuekw) + queue = cmd_queue.Queue.create(backend=config.backend, **queuekw) if config.virtualenv_cmd: # Experimental feature to automatically activate virtual # environments @@ -219,9 +268,12 @@ def create_queue(config, **kwargs: Any) -> "cmd_queue.Queue": if virtualenv_cmd == 'auto': import os import shlex + venv_path = os.environ.get('VIRTUAL_ENV', '') if venv_path: - virtualenv_cmd = 'source ' + shlex.quote(str(ub.Path(venv_path) / 'bin/activate')) + virtualenv_cmd = 'source ' + shlex.quote( + str(ub.Path(venv_path) / 'bin/activate') + ) else: virtualenv_cmd = None if virtualenv_cmd: @@ -230,7 +282,7 @@ def create_queue(config, **kwargs: Any) -> "cmd_queue.Queue": def run_queue( config, - queue: "cmd_queue.Queue", + queue: 'cmd_queue.Queue', print_kwargs: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> None: @@ -241,23 +293,27 @@ def run_queue( queue (cmd_queue.Queue): queue to run / report print_kwargs (None | Dict): """ - import cmd_queue + queue: cmd_queue.Queue print_thresh = 30 if config['print_commands'] == 'auto': if len(queue) < print_thresh: config['print_commands'] = 1 else: - print(f'More than {print_thresh} jobs, skip queue.print_commands. ' - 'If you want to see them explicitly specify print_commands=1') + print( + f'More than {print_thresh} jobs, skip queue.print_commands. ' + 'If you want to see them explicitly specify print_commands=1' + ) config['print_commands'] = 0 if config['print_queue'] == 'auto': if len(queue) < print_thresh: config['print_queue'] = 1 else: - print(f'More than {print_thresh} jobs, skip queue.print_graph. ' - 'If you want to see them explicitly specify print_queue=1') + print( + f'More than {print_thresh} jobs, skip queue.print_graph. ' + 'If you want to see them explicitly specify print_queue=1' + ) config['print_queue'] = 0 if config.print_commands: @@ -269,6 +325,9 @@ def run_queue( queue.print_graph(vertical_chains=True) if config.run: - queue.run(with_textual=config.with_textual, - other_session_handler=config.other_session_handler, - **kwargs) + queue.run( + with_textual=config.with_textual, + other_session_handler=config.other_session_handler, + monitor=config.monitor, + **kwargs, + ) diff --git a/cmd_queue/main.py b/cmd_queue/main.py index 555eaf4..2254469 100644 --- a/cmd_queue/main.py +++ b/cmd_queue/main.py @@ -1,8 +1,5 @@ #!/usr/bin/env python3 # PYTHON_ARGCOMPLETE_OK -from __future__ import annotations -# mypy: ignore-errors - """ This is the main script for the cmd_queue CLI. The :class:`CmdQueueConfig` defines the available options and its docstring provides a quick tutorial. @@ -13,7 +10,8 @@ cmd_queue --help """ -from typing import Any, Callable, TYPE_CHECKING +from __future__ import annotations +from typing import TYPE_CHECKING, Any, Callable import rich import scriptconfig as scfg @@ -66,19 +64,22 @@ def _testcase(): """ + if TYPE_CHECKING: import cmd_queue class CommonConfig(scfg.DataConfig): - qname = scfg.Value(None, position=1, help='name of the CLI queue') - dpath = scfg.Value('auto', help=ub.paragraph( - ''' + dpath = scfg.Value( + 'auto', + help=ub.paragraph( + """ The path the CLI will use to store intermediate files. Defaults to $XDG_CACHE/.cache/cmd_queue/cli - ''' - )) + """ + ), + ) verbose = scfg.Value(1, help='verbosity level') @@ -93,24 +94,39 @@ def main(cls, argv: int = 1, **kwargs: Any) -> None: rich.print('config = ' + ub.urepr(config, nl=1)) cli_queue_name = config['qname'] config.cli_queue_dpath = ub.Path(config['dpath']) - config.cli_queue_fpath = config.cli_queue_dpath / (str(cli_queue_name) + '.cmd_queue.json') + config.cli_queue_fpath = config.cli_queue_dpath / ( + str(cli_queue_name) + '.cmd_queue.json' + ) config.run() class CommonShowRun(CommonConfig): - workers = scfg.Value(1, help='number of concurrent queues for the tmux backend.') - - backend = scfg.Value('tmux', help='the execution backend to use', choices=['tmux', 'slurm', 'serial', 'airflow']) - - gpus = scfg.Value(None, help='a comma separated list of the gpu numbers to spread across. tmux backend only.') + workers = scfg.Value( + 1, help='number of concurrent queues for the tmux backend.' + ) + + backend = scfg.Value( + 'tmux', + help='the execution backend to use', + choices=['tmux', 'slurm', 'serial', 'airflow'], + ) + + gpus = scfg.Value( + None, + help='a comma separated list of the gpu numbers to spread across. tmux backend only.', + ) + + def _build_queue(config) -> 'cmd_queue.Queue': + import json - def _build_queue(config) -> "cmd_queue.Queue": import cmd_queue - import json - queue = cmd_queue.Queue.create(size=max(1, config['workers']), - backend=config['backend'], - name=config['qname'], - gpus=config['gpus']) + + queue = cmd_queue.Queue.create( + size=max(1, config['workers']), + backend=config['backend'], + name=config['qname'], + gpus=config['gpus'], + ) # Run a new CLI queue data = json.loads(config.cli_queue_fpath.read_text()) print('data = {}'.format(ub.urepr(data, nl=1))) @@ -128,17 +144,23 @@ def _build_queue(config) -> "cmd_queue.Queue": if len(bash_command) == 1: # hack import shlex + if shlex.quote(bash_command[0]) == bash_command[0]: bash_command = bash_command[0] else: bash_command = shlex.quote(bash_command[0]) else: import shlex - bash_command = ' '.join([shlex.quote(str(p)) for p in bash_command]) + + bash_command = ' '.join( + [shlex.quote(str(p)) for p in bash_command] + ) submitkw = ub.udict(row) & {'name', 'depends'} print('\n\n\n') print(f'submitkw={submitkw}') - print('bash_command = {}'.format(ub.urepr(bash_command, nl=1))) + print( + 'bash_command = {}'.format(ub.urepr(bash_command, nl=1)) + ) print('\n\n\n') queue.submit(bash_command, log=False, **submitkw) except Exception: @@ -256,11 +278,18 @@ class cleanup(CommonConfig): cleanup tmux sessions """ - yes = scfg.Value(False, isflag=True, help='if True say yes to prompts', short_alias=['y']) + yes = scfg.Value( + False, + isflag=True, + help='if True say yes to prompts', + short_alias=['y'], + ) __command__ = 'cleanup' + def run(config) -> None: from cmd_queue.util.util_tmux import tmux + sessions = tmux.list_sessions() print('sessions = {}'.format(ub.urepr(sessions, nl=1))) @@ -271,6 +300,7 @@ def run(config) -> None: sessions_ids.append(session['id']) print('sessions_ids = {}'.format(ub.urepr(sessions_ids, nl=1))) from rich import prompt + if config.yes or prompt.Confirm.ask('Do you want to kill these?'): for session_id in sessions_ids: tmux.kill_session(session_id) @@ -279,17 +309,107 @@ class run(CommonShowRun): """ run a queue """ + __command__ = 'run' + def run(config) -> None: - """ - """ + """ """ queue = config._build_queue() queue.run() + class monitor(CommonConfig): + """ + Monitor an already-running queue. + + Locates the queue by name (via the active-queue index that ``run`` + populates), by manifest path, or by the queue's working directory. + Useful for reattaching to a queue whose ``run()`` invocation has + ended (e.g. shell closed) while workers are still active, and as + the entry point used by the tmux monitor backend to host the + status UI in its own session. + """ + + __command__ = 'monitor' + + manifest = scfg.Value( + None, + help=ub.paragraph( + """ + Optional explicit path to the monitor manifest JSON. If + given, this overrides positional name resolution. + """ + ), + ) + + onfail = scfg.Value( + '', + choices=['', 'kill'], + help=ub.paragraph( + """ + What to do if the queue ends with at least one failure. + ``kill`` cancels still-running workers; ``''`` leaves them. + """ + ), + ) + + onexit = scfg.Value( + '', + choices=['', 'capture'], + help=ub.paragraph( + """ + What to do once the queue is fully done. ``capture`` runs the + backend's capture step (e.g. dump tmux pane contents). + """ + ), + ) + + refresh_rate = scfg.Value(0.4, help='monitor refresh rate, seconds') + + with_textual = scfg.Value( + 'auto', help='use textual UI if available (tmux backend only)' + ) + + def run(config) -> None: + from cmd_queue import monitor_manifest as mm + + if config.manifest: + manifest_path = ub.Path(config.manifest).expand().absolute() + if not manifest_path.exists(): + raise FileNotFoundError(manifest_path) + else: + target = config['qname'] + if not target: + raise SystemExit( + 'cmd_queue monitor requires either a queue name ' + '(positional) or --manifest=' + ) + manifest_path = mm.resolve_manifest(target) + if config.verbose: + rich.print( + f'Loading monitor manifest from [bold]{manifest_path}[/bold]' + ) + queue = mm.load_queue_for_monitoring(manifest_path) + kwargs = {} + try: + kwargs['refresh_rate'] = config.refresh_rate + except Exception: + pass + if 'with_textual' in queue.monitor.__code__.co_varnames: + kwargs['with_textual'] = config.with_textual + # monitor() owns post-run cleanup; only forward the kwargs the + # backend's monitor signature actually accepts. + varnames = queue.monitor.__code__.co_varnames + if 'onfail' in varnames: + kwargs['onfail'] = config.onfail + if 'onexit' in varnames: + kwargs['onexit'] = config.onexit + queue.monitor(**kwargs) + class show(CommonShowRun): """ display a queue """ + __command__ = 'show' def run(config) -> None: @@ -301,20 +421,30 @@ class submit(CommonConfig): """ submit a job to a queue """ + __command__ = 'submit' - jobname = scfg.Value(None, help='for submit, this is the name of the new job') + jobname = scfg.Value( + None, help='for submit, this is the name of the new job' + ) depends = scfg.Value(None, help='comma separated jobnames to depend on') - command = scfg.Value(None, type=str, position=2, nargs='*', help=ub.paragraph( - ''' + command = scfg.Value( + None, + type=str, + position=2, + nargs='*', + help=ub.paragraph( + """ Specifies the bash command to queue. Care must be taken when specifying this argument. If specifying as a key/value pair argument, it is important to quote and escape the bash command properly. A more convenient way to specify this command is as a positional argument. End all of the options to this CLI with `--` and then specify your full command. - ''')) + """ + ), + ) def run(config) -> None: r""" @@ -363,6 +493,7 @@ def run(config) -> None: ub.cmd('cmd_queue test-queue') """ import json + # Run a new CLI queue data = json.loads(config.cli_queue_fpath.read_text()) row = {'type': 'command', 'command': config['command']} @@ -377,11 +508,16 @@ class new(CommonConfig): """ create a new queue """ + __command__ = 'new' - header = scfg.Value(None, help='a header command to execute in every session (e.g. activating a virtualenv). Only used when action is new') + header = scfg.Value( + None, + help='a header command to execute in every session (e.g. activating a virtualenv). Only used when action is new', + ) def run(config) -> None: import json + # Start a new CLI queue data = [] config = config @@ -396,9 +532,13 @@ class list(CommonConfig): """ display available queues """ + __command__ = 'list' + def run(config) -> None: - print(ub.urepr(list(config.cli_queue_dpath.glob('*.cmd_queue.json')))) + print( + ub.urepr(list(config.cli_queue_dpath.glob('*.cmd_queue.json'))) + ) main: Callable[..., Any] = CmdQueueCLI.main diff --git a/cmd_queue/monitor_app.py b/cmd_queue/monitor_app.py index 46639d9..842fb5b 100644 --- a/cmd_queue/monitor_app.py +++ b/cmd_queue/monitor_app.py @@ -1,35 +1,32 @@ from __future__ import annotations -# mypy: ignore-errors from types import ModuleType from typing import Any, Callable, Optional, Tuple try: from textual import events - from textual.widgets import ScrollView - from textual.widget import Widget from textual.views import DockView - from cmd_queue.util.textual_extensions import ExtHeader - from cmd_queue.util.textual_extensions import InstanceRunnableApp + from textual.widget import Widget + from textual.widgets import ScrollView # from rich.panel import Panel # from rich.text import Text from cmd_queue.util import richer as rich from cmd_queue.util import texter as textual + from cmd_queue.util.textual_extensions import ExtHeader, InstanceRunnableApp # import ubelt as ub except ImportError: - rich: ModuleType = None - textual: ModuleType = None - events: ModuleType = None - ScrollView: type = object - Widget: type = object - DockView: type = object - InstanceRunnableApp: type = object - ExtHeader: type = object - + rich: ModuleType = None # type: ignore + textual: ModuleType = None # type: ignore + events: ModuleType = None # type: ignore + ScrollView: type = object # type: ignore + Widget: type = object # type: ignore + DockView: type = object # type: ignore + InstanceRunnableApp: type = object # type: ignore + ExtHeader: type = object # type: ignore -class JobTable(Widget): +class JobTable(Widget): # type: ignore def __init__( self, table_fn: Optional[Callable[[], Tuple[Any, bool, Any]]] = None, @@ -55,7 +52,7 @@ def render(self) -> Any: return table -class CmdQueueMonitorApp(InstanceRunnableApp): +class CmdQueueMonitorApp(InstanceRunnableApp): # type: ignore """ A Textual App to monitor jobs """ @@ -64,11 +61,14 @@ def __init__( self, table_fn: Callable[[], Tuple[Any, bool, Any]], kill_fn: Optional[Callable[[], Any]] = None, + attach_session: Optional[str] = None, **kwargs: Any, ) -> None: self.job_table = JobTable(table_fn) self.kill_fn = kill_fn self.graceful_exit = False + self.attach_session = attach_session + self.attach_requested = False super().__init__(**kwargs) self._title = 'Command Queue' @@ -89,9 +89,11 @@ def demo(CmdQueueMonitorApp) -> CmdQueueMonitorApp: >>> print(f'self.graceful_exit={self.graceful_exit}') """ countdown = 10 + def demo_table_fn(): nonlocal countdown import random + r = random.random() columns = ['name', 'status', 'passed', 'errors', 'total'] table = rich.table.Table() @@ -110,18 +112,29 @@ def demo_table_fn(): finished = countdown <= 0 agg_state = None return table, finished, agg_state + return CmdQueueMonitorApp(demo_table_fn) async def on_load(self, event: Any) -> None: - await self.bind("q", "quit", "Quit") + await self.bind('q', 'quit', 'Quit') + if self.attach_session is not None: + await self.bind('a', 'attach_monitor', 'Attach monitor') async def action_quit(self) -> None: await self.shutdown() + async def action_attach_monitor(self) -> None: + # The actual tmux attach has to happen *after* the textual app + # releases the terminal. Flag it and shut down; the caller + # (TMUXMultiQueue._textual_monitor) checks ``attach_requested`` + # and performs the attach + re-launches the app. + self.attach_requested = True + await self.shutdown() + async def on_mount(self, event: Any) -> None: # from textual.layouts.vertical import VerticalLayout - view: DockView = await self.push_view(DockView()) + view: DockView = await self.push_view(DockView()) # type: ignore header = ExtHeader(tall=False) footer = textual.widgets.Footer() # panel = rich.panel.Panel() @@ -133,8 +146,8 @@ async def on_mount(self, event: Any) -> None: # vlayout.add(text) # vlayout.add(table_view) - await view.dock(header, edge="top") - await view.dock(footer, edge="bottom") + await view.dock(header, edge='top') + await view.dock(footer, edge='bottom') await view.dock(table_view) # await view.dock(scrollview2) diff --git a/cmd_queue/monitor_manifest.py b/cmd_queue/monitor_manifest.py new file mode 100644 index 0000000..e3c1440 --- /dev/null +++ b/cmd_queue/monitor_manifest.py @@ -0,0 +1,131 @@ +""" +Persistent metadata describing a queue at run-time so that a monitor process +can reattach to it without holding a live queue object. + +A monitor manifest is a small JSON file written by ``Queue.run()`` (or its +subclass overrides). It captures everything the monitor needs to: + + * read worker state files (tmux backend) or job ids (slurm backend) + * cleanup the queue (kill tmux sessions, scancel slurm jobs) + +The :func:`load_queue_for_monitoring` factory rebuilds a queue object +that is sufficient for ``monitor()`` and ``kill()`` to work, without +re-submitting jobs or re-running the workload. + +An "active queue" index in ``~/.cache/cmd_queue/active/.json`` maps +a human queue name to the most recent manifest path so that +``cmd_queue monitor `` can find it. +""" +from __future__ import annotations +import json +from typing import Any, Dict, Optional + +import ubelt as ub + +SCHEMA_VERSION = 1 + + +def manifest_path_for_dpath(dpath: Any) -> ub.Path: + """Canonical location of the manifest file inside a queue's dpath.""" + return ub.Path(dpath) / 'monitor_manifest.json' + + +def _active_index_dpath() -> ub.Path: + return ub.Path.appdir('cmd_queue/active').ensuredir() + + +def active_index_path(name: str) -> ub.Path: + """Path to the active-queue index entry for the given queue name.""" + return _active_index_dpath() / f'{name}.json' + + +def write_manifest(manifest: Dict[str, Any], path: Any) -> ub.Path: + """Atomically write a manifest dict to ``path``.""" + path = ub.Path(path) + path.parent.ensuredir() + payload = dict(manifest) + payload.setdefault('schema_version', SCHEMA_VERSION) + payload['manifest_path'] = str(path) + tmp = path.with_suffix(path.suffix + '.tmp') + tmp.write_text(json.dumps(payload, indent=2, sort_keys=True)) + tmp.replace(path) + return path + + +def read_manifest(path: Any) -> Dict[str, Any]: + return json.loads(ub.Path(path).read_text()) + + +def update_active_index(name: str, manifest_path: Any) -> Optional[ub.Path]: + """Record ``name -> manifest_path`` so ``cmd_queue monitor `` works. + + Returns the active index entry path on success, ``None`` if no name was + provided (e.g. the queue was unnamed). + """ + if not name: + return None + entry = active_index_path(name) + payload = { + 'name': name, + 'manifest_path': str(manifest_path), + 'updated_at': ub.timestamp(), + } + entry.parent.ensuredir() + tmp = entry.with_suffix(entry.suffix + '.tmp') + tmp.write_text(json.dumps(payload, indent=2, sort_keys=True)) + tmp.replace(entry) + return entry + + +def resolve_manifest(name_or_path: str) -> ub.Path: + """Resolve a name or path argument to an absolute manifest path. + + Accepts: + * an absolute or relative path to a manifest file + * a path to a queue dpath (containing ``monitor_manifest.json``) + * a queue name registered in the active-queue index + """ + candidate = ub.Path(name_or_path).expand() + if candidate.is_file(): + return candidate.absolute() + if candidate.is_dir(): + nested = manifest_path_for_dpath(candidate) + if nested.exists(): + return nested.absolute() + entry = active_index_path(name_or_path) + if entry.exists(): + info = json.loads(entry.read_text()) + path = ub.Path(info['manifest_path']) + if path.exists(): + return path.absolute() + raise FileNotFoundError( + f'Active-index entry for {name_or_path!r} points to ' + f'{path}, which no longer exists.' + ) + raise FileNotFoundError( + f'Could not resolve {name_or_path!r} to a queue manifest. ' + f'Tried as path, dpath, and active-index name.' + ) + + +def load_queue_for_monitoring(manifest_path: Any) -> Any: + """Construct a queue object from a manifest, suitable for monitor/kill. + + The returned queue has no submitted jobs. Its ``monitor()`` and + ``kill()`` methods operate on the persisted state files / job ids that + the original ``run()`` invocation produced. + """ + manifest = read_manifest(manifest_path) + backend = manifest['backend'] + if backend == 'tmux': + from cmd_queue import tmux_queue + + return tmux_queue.TMUXMultiQueue._from_manifest(manifest) + elif backend == 'slurm': + from cmd_queue import slurm_queue + + return slurm_queue.SlurmQueue._from_manifest(manifest) + else: + raise NotImplementedError( + f'Monitor reattach is not implemented for backend {backend!r}' + ) diff --git a/cmd_queue/serial_queue.py b/cmd_queue/serial_queue.py index cfcf09a..d47426a 100644 --- a/cmd_queue/serial_queue.py +++ b/cmd_queue/serial_queue.py @@ -1,19 +1,16 @@ -from __future__ import annotations -# mypy: ignore-errors - """ References: https://jmmv.dev/2018/03/shell-readability-strict-mode.html https://stackoverflow.com/questions/13195655/bash-set-x-without-it-being-printed """ +from __future__ import annotations import uuid from typing import Any, Dict, Iterable, List, Optional import ubelt as ub from cmd_queue import base_queue -from cmd_queue.util import util_bash -from cmd_queue.util import util_tags +from cmd_queue.util import util_bash, util_tags class BashJob(base_queue.Job): @@ -86,6 +83,7 @@ class BashJob(base_queue.Job): >>> self = BashJob('echo hi', 'myjob') >>> self.print_commands(with_status=True, with_gaurds=True) """ + def __init__( self, command: str, @@ -103,10 +101,10 @@ def __init__( preamble: Optional[List[str]] = None, **kwargs: Any, ) -> None: - if depends is not None and not ub.iterable(depends): - depends = [depends] + depends = [depends] # type: ignore self.name = name + assert self.name is not None self.pathid = self.name + '_' + ub.hash_data(uuid.uuid4())[0:8] self.kwargs = kwargs # unused kwargs self.cwd = cwd @@ -119,6 +117,7 @@ def __init__( self.info_dpath = info_dpath self.pass_fpath = self.info_dpath / f'passed/{self.pathid}.pass' self.fail_fpath = self.info_dpath / f'failed/{self.pathid}.fail' + self.skip_fpath = self.info_dpath / f'skipped/{self.pathid}.skip' self.stat_fpath = self.info_dpath / f'status/{self.pathid}.stat' self.log_fpath = self.info_dpath / f'status/{self.pathid}.logs' self.tags = util_tags.Tags.coerce(tags) @@ -149,7 +148,6 @@ def finalize_text( conditionals: Optional[Dict[str, List[str]]] = None, **kwargs: Any, ) -> str: - # Note: with_gaurds are the +- e and +-x bash behaviors, it is not a # great name. with_status is used to dump extra metadata out. These add # a lot of bash boilerplate, which can make the script more difficult @@ -172,7 +170,10 @@ def finalize_text( f'printf "fail" > {self.fail_fpath}', ], # when dependencies are unmet - 'on_skip': [ ] + 'on_skip': [ + f'mkdir -p {self.skip_fpath.parent}', + f'printf "skip" > {self.skip_fpath}', + ], } # Append custom conditionals @@ -182,7 +183,7 @@ def finalize_text( v2 = conditionals.get(k) if not ub.iterable(v2): v2 = [v2] - v.extend(v2) + v.extend(v2) # type: ignore if with_status: prefix_script.append('# Ensure job status directory') @@ -215,8 +216,9 @@ def finalize_text( json_fmt_parts += [ ('logs', '"%s"', self.log_fpath), ] - dump_pre_status = util_bash.bash_json_dump(json_fmt_parts, - self.stat_fpath) + dump_pre_status = util_bash.bash_json_dump( + json_fmt_parts, self.stat_fpath + ) script.append('# Mark job as running') script.append(dump_pre_status) @@ -235,13 +237,17 @@ def finalize_text( if self.cwd is not None: # If the directory doesn't exist, then the job is marked as failed. script.append('# Change to the specified directory') - script.append(f'{{ pushd "{self.cwd}" && CHDIR_OK=1; }} || CHDIR_OK=0') + script.append( + f'{{ pushd "{self.cwd}" && CHDIR_OK=1; }} || CHDIR_OK=0' + ) internal_conditionals.append('"$CHDIR_OK" == 1') if self.preamble: script.append('# Run preamble') preamble_str = ' && '.join(self.preamble) - script.append(f'{{ {preamble_str} && PREAMBLE_OK=1; }} || PREAMBLE_OK=0') + script.append( + f'{{ {preamble_str} && PREAMBLE_OK=1; }} || PREAMBLE_OK=0' + ) internal_conditionals.append('"$PREAMBLE_OK" == 1') if internal_conditionals: @@ -276,7 +282,9 @@ def finalize_text( # Tells bash to stop printing commands, but is clever in that it # captures the last return code and doesnt print this command. # Also set -e so our boilerplate is not allowed to fail - script.append('# Capture job return code, disable command echo, enable exit-on-error') + script.append( + '# Capture job return code, disable command echo, enable exit-on-error' + ) script.append('{ RETURN_CODE=$? ; set +x -e; } 2>/dev/null') # NOTE: ${PIPESTATUS[0]} is an alternative to $? if we want a # specific return code from a job chain @@ -321,18 +329,26 @@ def finalize_text( json_fmt_parts += [ ('logs', '"%s"', self.log_fpath), ] - dump_post_status = util_bash.bash_json_dump(json_fmt_parts, - self.stat_fpath) + dump_post_status = util_bash.bash_json_dump( + json_fmt_parts, self.stat_fpath + ) on_pass_part = indent(_job_conditionals['on_pass']) on_fail_part = indent(_job_conditionals['on_fail']) - conditional_body = '\n'.join([ - 'if [[ "$RETURN_CODE" == "0" ]]; then', - on_pass_part, - 'else', - on_fail_part, - 'fi' - ]) + # RETURN_CODE=126 means dependencies were unmet; on_skip + # already ran in the deps-failed branch above, so we don't + # want to also mark the job as failed here. + conditional_body = '\n'.join( + [ + 'if [[ "$RETURN_CODE" == "0" ]]; then', + on_pass_part, + 'elif [[ "$RETURN_CODE" == "126" ]]; then', + ' : # job was skipped; on_skip already handled', + 'else', + on_fail_part, + 'fi', + ] + ) script.append('# Mark job as stopped') script.append(dump_post_status) script.append(conditional_body) @@ -388,11 +404,13 @@ def print_commands( """ style = base_queue.Queue._coerce_style(self, style, with_rich) - code = self.finalize_text(with_status=with_status, - with_gaurds=with_gaurds, **kwargs) + code = self.finalize_text( + with_status=with_status, with_gaurds=with_gaurds, **kwargs + ) if style == 'rich': - from rich.syntax import Syntax from rich.console import Console + from rich.syntax import Syntax + console = Console() console.print(Syntax(code, 'bash')) elif style == 'colors': @@ -479,7 +497,11 @@ def __init__( """ super().__init__() if rootid is None: - rootid = str(ub.timestamp().split('T')[0]) + '_' + ub.hash_data(uuid.uuid4())[0:8] + rootid = ( + str(ub.timestamp().split('T')[0]) + + '_' + + ub.hash_data(uuid.uuid4())[0:8] + ) self.name = name self.rootid = rootid if dpath is None: @@ -489,7 +511,9 @@ def __init__( self.unused_kwargs = kwargs self.fpath = self.dpath / (self.pathid + '.sh') - self.state_fpath = self.dpath / 'serial_queue_{}.txt'.format(self.pathid) + self.state_fpath = self.dpath / 'serial_queue_{}.txt'.format( + self.pathid + ) self.environ = environ self.header = '#!/bin/bash' # todo: handle different shells @@ -504,7 +528,7 @@ def __init__( @property def pathid(self) -> str: - """ A path-safe identifier for file names """ + """A path-safe identifier for file names""" return '{}_{}'.format(self.name, self.rootid) def __nice__(self) -> str: @@ -525,9 +549,11 @@ def order_jobs(self) -> None: """ # We need to ensure the jobs are in a topologoical order here. import networkx as nx + graph = self._dependency_graph() original_order = [j.name for j in self.jobs] from cmd_queue.util import util_networkx + if not util_networkx.is_topological_order(graph, original_order): # If not already topologically sorted, try to make the minimal # reordering to achieve it. @@ -555,6 +581,7 @@ def finalize_text( """ import cmd_queue + self.order_jobs() script = [self.header] script += ['# Written by cmd_queue {}'.format(cmd_queue.__version__)] @@ -565,15 +592,18 @@ def finalize_text( script.append('set -e') if with_status: - script.append(ub.codeblock( - f''' + script.append( + ub.codeblock( + f""" # Init state to keep track of job progress (( "_CMD_QUEUE_NUM_FAILED=0" )) || true (( "_CMD_QUEUE_NUM_PASSED=0" )) || true (( "_CMD_QUEUE_NUM_SKIPPED=0" )) || true _CMD_QUEUE_TOTAL={total} _CMD_QUEUE_STATUS="" - ''')) + """ + ) + ) old_status = None @@ -582,10 +612,13 @@ def _mark_status(status): # be careful with json formatting here if with_status: if old_status != status: - script.append(ub.codeblock( - ''' + script.append( + ub.codeblock( + """ _CMD_QUEUE_STATUS="{}" - ''').format(status)) + """ + ).format(status) + ) old_status = status @@ -599,8 +632,9 @@ def _mark_status(status): ('name', '"%s"', self.name), ('rootid', '"%s"', self.rootid), ] - dump_code = util_bash.bash_json_dump(json_fmt_parts, - self.state_fpath) + dump_code = util_bash.bash_json_dump( + json_fmt_parts, self.state_fpath + ) script.append('# Update queue status') script.append(dump_code) # script.append('cat ' + str(self.state_fpath)) @@ -624,8 +658,9 @@ def _command_exit(): _mark_status('set_environ') if with_gaurds: _command_enter() - script.extend([ - f'export {k}="{v}"' for k, v in self.environ.items()]) + script.extend( + [f'export {k}="{v}"' for k, v in self.environ.items()] + ) if with_gaurds: _command_exit() @@ -658,7 +693,9 @@ def _command_exit(): if job.bookkeeper: if with_locks: - script.append(job.finalize_text(with_status, with_gaurds)) + script.append( + job.finalize_text(with_status, with_gaurds) + ) else: if with_status: script.append('') @@ -667,18 +704,25 @@ def _command_exit(): _mark_status('run') - script.append(ub.codeblock( - ''' + script.append( + ub.codeblock( + """ # ### Command {} / {} - {} - ''').format(num + 1, total, job.name)) + """ + ).format(num + 1, total, job.name) + ) conditionals = { 'on_pass': '(( "_CMD_QUEUE_NUM_PASSED=_CMD_QUEUE_NUM_PASSED+1" )) || true', 'on_fail': '(( "_CMD_QUEUE_NUM_FAILED=_CMD_QUEUE_NUM_FAILED+1" )) || true', 'on_skip': '(( "_CMD_QUEUE_NUM_SKIPPED=_CMD_QUEUE_NUM_SKIPPED+1" )) || true', } - script.append(job.finalize_text(with_status, with_gaurds, conditionals)) + script.append( + job.finalize_text( + with_status, with_gaurds, conditionals + ) + ) if with_status: script.append('# ') script.append('#') @@ -700,7 +744,7 @@ def _command_exit(): text = '\n'.join(script) return text - def add_header_command(self, command: str) -> None: + def add_header_command(self, command: str) -> None: # type: ignore ub.schedule_deprecation( modname='cmd_queue', name='add_header_command', @@ -708,7 +752,7 @@ def add_header_command(self, command: str) -> None: migration='use preamble kwarg or add_preamble_command instead', deprecate='now', ) - self.add_preamble_command.append(command) + self.add_preamble_command(command) def add_preamble_command(self, command: Any) -> None: if isinstance(command, list): @@ -783,18 +827,40 @@ def run( # TODO: can implement a monitor here for non-blocking mode detach = not block if mode == 'bash': - ub.cmd(f'bash {self.fpath}', verbose=verbose, check=True, - capture=capture, shell=shell, system=system, detach=detach) + ub.cmd( + f'bash {self.fpath}', + verbose=verbose, + check=True, + capture=capture, + shell=shell, + system=system, + detach=detach, + ) elif mode == 'source': - ub.cmd(f'source {self.fpath}', verbose=verbose, check=True, - capture=capture, shell=shell, system=system, detach=detach) + ub.cmd( + f'source {self.fpath}', + verbose=verbose, + check=True, + capture=capture, + shell=shell, + system=system, + detach=detach, + ) else: - ub.cmd(f'{mode} {self.fpath}', verbose=verbose, check=True, - capture=capture, shell=shell, system=system, detach=detach) + ub.cmd( + f'{mode} {self.fpath}', + verbose=verbose, + check=True, + capture=capture, + shell=shell, + system=system, + detach=detach, + ) # raise KeyError def job_details(self) -> None: import json + for job in self.jobs: print('+--------') print(f'job={job}') @@ -807,6 +873,7 @@ def job_details(self) -> None: def read_state(self) -> Dict[str, Any]: import json import time + max_attempts = 100 num_attempts = 0 while True: @@ -859,6 +926,7 @@ def indent(text: Any, prefix: str = ' ') -> str: def _check_bash_text_for_syntax_errors(bash_text: str) -> None: import tempfile + tmpdir = tempfile.TemporaryDirectory() with tmpdir: dpath = ub.Path(tmpdir.name) diff --git a/cmd_queue/slurm_queue.py b/cmd_queue/slurm_queue.py index 61a40d7..7c7c800 100644 --- a/cmd_queue/slurm_queue.py +++ b/cmd_queue/slurm_queue.py @@ -1,6 +1,3 @@ -from __future__ import annotations -# mypy: ignore-errors - r""" Work in progress. The idea is to provide a TMUX queue and a SLURM queue that provide a common high level API, even though functionality might diverge, the @@ -40,6 +37,7 @@ >>> else: >>> print('output does not exist') """ +from __future__ import annotations from typing import Any, Dict, Iterable, List, Optional, Union import ubelt as ub @@ -47,7 +45,6 @@ from cmd_queue import base_queue # NOQA from cmd_queue.util import util_tags - try: from functools import cache # Python 3.9+ only except ImportError: @@ -57,6 +54,7 @@ @cache def _unit_registery() -> Any: import sys + if sys.version_info[0:2] == (3, 9): # backwards compatibility support for numpy 2.0 and pint on cp39 try: @@ -67,6 +65,7 @@ def _unit_registery() -> Any: if not np.__version__.startswith('1.'): np.cumproduct = np.cumprod import pint + reg = pint.UnitRegistry() return reg @@ -234,6 +233,7 @@ class SlurmJob(base_queue.Job): >>> command = self._build_command() >>> print(command) """ + def __init__( self, command: str, @@ -246,15 +246,16 @@ def __init__( begin: Optional[Any] = None, shell: Optional[Any] = None, tags: Optional[Any] = None, - preamble: Optional[List[str]] = None, + preamble: List[str] | str | None = None, **kwargs: Any, ) -> None: super().__init__() if name is None: import uuid + name = 'job-' + str(uuid.uuid4()) if depends is not None and not ub.iterable(depends): - depends = [depends] + depends = [depends] # type: ignore self.unused_kwargs = kwargs self.command = command self.name = name @@ -284,8 +285,10 @@ def _build_command( jobname_to_varname: Optional[Dict[str, str]] = None, global_preamble: Optional[List[str]] = None, ) -> str: - args = self._build_sbatch_args(jobname_to_varname=jobname_to_varname, - global_preamble=global_preamble) + args = self._build_sbatch_args( + jobname_to_varname=jobname_to_varname, + global_preamble=global_preamble, + ) return ' \\\n '.join(args) def _build_sbatch_args( @@ -303,15 +306,19 @@ def _build_sbatch_args( sbatch_args.append(f'--mem={mem}') if self.gpus and 'gres' not in self._sbatch_kvargs: ub.schedule_deprecation( - 'cmd_queue', name='gres', type='argument', + 'cmd_queue', + name='gres', + type='argument', migration=ub.paragraph( - ''' + """ the handling of gres here is broken and will be changed in the future. For now specify gres explicitly in slurm_options or the kwargs for the queue. - '''), - deprecate='now' + """ + ), + deprecate='now', ) + # NOTE: the handling of gres here is broken and will be changed in # the future. For now specify gres explicitly in slurm_options def _coerce_gres(gpus): @@ -324,6 +331,7 @@ def _coerce_gres(gpus): else: raise TypeError(type(self.gpus)) return gres + gres = _coerce_gres(self.gpus) sbatch_args.append(f'--gres="{gres}"') if self.output_fpath: @@ -344,13 +352,18 @@ def _coerce_gres(gpus): type_to_dependencies = { 'afterok': [], } - depends = self.depends if ub.iterable(self.depends) else [self.depends] + depends = ( + self.depends if ub.iterable(self.depends) else [self.depends] + ) for item in depends: if isinstance(item, SlurmJob): jobid = item.jobid if jobid is None and item.name: - if jobname_to_varname and item.name in jobname_to_varname: + if ( + jobname_to_varname + and item.name in jobname_to_varname + ): jobid = '${%s}' % jobname_to_varname[item.name] else: jobid = f"$(squeue --noheader --format %i --name '{item.name}')" @@ -383,6 +396,7 @@ def _coerce_gres(gpus): sbatch_args.append(f'"--begin={self.begin}"') import shlex + _preamble = [] if global_preamble: _preamble.extend(global_preamble) @@ -450,6 +464,7 @@ class SlurmQueue(base_queue.Queue): >>> job5 = self.submit('echo "$FOO"') >>> self.print_commands() """ + def __init__( self, name: Optional[str] = None, @@ -458,14 +473,18 @@ def __init__( **kwargs: Any, ) -> None: super().__init__() - import uuid import time + import uuid + self.jobs = [] if name is None: name = 'SQ' + self.name = name stamp = time.strftime('%Y%m%dT%H%M%S') self.unused_kwargs = kwargs - self.queue_id = name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8] + self.queue_id = ( + name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8] + ) self.dpath = ub.Path.appdir('cmd_queue/slurm') / self.queue_id if 0: # hack for submission on different systems, probably dont want to @@ -496,6 +515,7 @@ def _slurm_checks() -> None: status['has_squeue'] = bool(info['squeue_fpath']) status['slurmd_running'] = False import psutil + for p in psutil.process_iter(): if p.name() == 'slurmd': status['slurmd_running'] = True @@ -506,19 +526,23 @@ def _slurm_checks() -> None: 'create_time': p.create_time(), } break - status['squeue_working'] = (ub.cmd('squeue')['ret'] == 0) + status['squeue_working'] = ub.cmd('squeue')['ret'] == 0 sinfo = ub.cmd('sinfo --json') status['sinfo_working'] = False if sinfo['ret'] == 0: import json + status['sinfo_working'] = True - status['sinfo_version_str'] = ub.cmd('sinfo --version').stdout.strip().split(' ')[1] + status['sinfo_version_str'] = ( + ub.cmd('sinfo --version').stdout.strip().split(' ')[1] + ) sinfo_out = json.loads(sinfo['out']) nodes = sinfo_out['nodes'] node_states = [node['state'] for node in nodes] has_working_nodes = not all( - 'down' in str(state).lower() for state in node_states) + 'down' in str(state).lower() for state in node_states + ) status['has_working_nodes'] = has_working_nodes @staticmethod @@ -528,20 +552,26 @@ def is_available() -> bool: """ if ub.find_exe('squeue'): import psutil - slurmd_running = any(p.name() == 'slurmd' for p in psutil.process_iter()) + + slurmd_running = any( + p.name() == 'slurmd' for p in psutil.process_iter() + ) if slurmd_running: - squeue_working = (ub.cmd('squeue')['ret'] == 0) + squeue_working = ub.cmd('squeue')['ret'] == 0 if squeue_working: # Check if nodes are available or down # note: the --json command is not available in # slurm-wlm 19.05.5, but it is in slurm-wlm 21.08.5 - sinfo_version_str = ub.cmd('sinfo --version').stdout.strip().split(' ')[1] + sinfo_version_str = ( + ub.cmd('sinfo --version').stdout.strip().split(' ')[1] + ) sinfo_major_version = int(sinfo_version_str.split('.')[0]) if sinfo_major_version < 21: # Dont check in this case return True else: import json + # sinfo --json changed between v22 and v23 # https://github.com/SchedMD/slurm/blob/slurm-23.02/RELEASE_NOTES#L230 if sinfo_major_version >= 21: @@ -563,10 +593,13 @@ def is_available() -> bool: node_states = [node['state'] for node in nodes] if sinfo_major_version > 21: has_working_nodes = not all( - 'down' in str(state).lower() for state in node_states) + 'down' in str(state).lower() + for state in node_states + ) else: has_working_nodes = not all( - 'DOWN' in state for state in node_states) + 'DOWN' in state for state in node_states + ) if has_working_nodes: return True @@ -619,7 +652,8 @@ def submit( depends = [depends] depends = [ self.named_jobs[dep] if isinstance(dep, str) else dep - for dep in depends] + for dep in depends + ] _kwargs = self._sbatch_kvargs | kwargs job = SlurmJob(command, depends=depends, preamble=preamble, **_kwargs) @@ -636,6 +670,7 @@ def order_jobs(self) -> List[SlurmJob]: List[SlurmJob]: ordered jobs """ import networkx as nx + graph = self._dependency_graph() if 0: print(nx.forest_str(nx.minimum_spanning_arborescence(graph))) @@ -645,7 +680,9 @@ def order_jobs(self) -> List[SlurmJob]: new_order.append(job) return new_order - def finalize_text(self, exclude_tags: Optional[Any] = None, **kwargs: Any) -> str: + def finalize_text( + self, exclude_tags: Optional[Any] = None, **kwargs: Any + ) -> str: """ Serialize the state of the queue into a bash script. @@ -668,7 +705,8 @@ def finalize_text(self, exclude_tags: Optional[Any] = None, **kwargs: Any) -> st # args = job._build_sbatch_args(jobname_to_varname) # command = ' '.join(args) command = job._build_command( - jobname_to_varname, global_preamble=global_preamble) + jobname_to_varname, global_preamble=global_preamble + ) if 1: varname = 'JOB_{:03d}'.format(len(jobname_to_varname)) command = f'{varname}=$({command} --parsable)' @@ -681,6 +719,7 @@ def finalize_text(self, exclude_tags: Optional[Any] = None, **kwargs: Any) -> st # Build a command to dump the job-ids for this queue to disk to # allow us to track them in the monitor. from cmd_queue.util import util_bash + json_fmt_parts = [ (job_varname, '%s', '$' + job_varname) for job_varname in self.jobname_to_varname.values() @@ -692,18 +731,173 @@ def finalize_text(self, exclude_tags: Optional[Any] = None, **kwargs: Any) -> st text = '\n'.join(commands) return text - def run(self, block: bool = True, system: bool = False, **kw: Any) -> Optional[Any]: + def run( + self, + block: bool = True, + system: bool = False, + onfail: str = '', + onexit: str = '', + monitor: str = 'hybrid', + **kw: Any, + ) -> Optional[Any]: + """ + Execute the queue. + + Args: + monitor (str): where the live status UI runs while + ``block=True``. + + * ``'hybrid'`` (default): inline UI in the current + shell *and* a detached ``cmd_queue monitor`` tmux + session you can press ``[a]`` to attach to. The + side session is killed when the inline monitor + exits. Falls back to ``'inline'`` when tmux is + unavailable. + * ``'inline'``: renders only in the current shell. + * ``'tmux'``: spawns ``cmd_queue monitor`` only in a + detached tmux session so the UI survives the + calling shell closing — useful for slurm jobs + whose workers run on the cluster long after the + submit shell might be gone. + * ``'none'``: skips the UI but still blocks when + ``block=True``. + """ if not self.is_available(): raise Exception('slurm backend is not available') self.log_dpath.ensuredir() self.write() + manifest_path = self._write_monitor_manifest() ub.cmd(f'bash {self.fpath}', verbose=3, check=True, system=system) - if block: - return self.monitor() + if not block: + return None + if monitor == 'inline': + return self.monitor(onfail=onfail, onexit=onexit) + if monitor == 'hybrid': + from cmd_queue.util.util_tmux import tmux as _tmux + + side_session = None + if ub.find_exe('tmux'): + extra_args = [] + if onfail: + extra_args.append(f'--onfail={onfail}') + if onexit: + extra_args.append(f'--onexit={onexit}') + side_session = f'cmdq-monitor-{self.queue_id}' + from rich import print as rich_print + + rich_print( + f'[dim]Spawned attachable monitor in tmux session[/dim] ' + f'{side_session} [dim](press [a] to attach)[/dim]' + ) + _tmux.spawn_monitor_session( + session_name=side_session, + manifest_path=manifest_path, + attach=False, + verbose=0, + extra_args=extra_args, + ) + else: + import warnings - def monitor(self, refresh_rate: float = 0.4) -> Optional[Any]: + warnings.warn( + "monitor='hybrid' requested but tmux not found; " + 'falling back to inline-only monitor.' + ) + try: + return self.monitor( + onfail=onfail, + onexit=onexit, + side_session=side_session, + ) + finally: + if side_session and _tmux.has_session(side_session): + _tmux.kill_session(side_session, verbose=0) + if monitor == 'none': + from rich import print as rich_print + + rich_print( + '[bold]Queue running detached.[/bold] ' + f'Reattach with: cmd_queue monitor --manifest={manifest_path}' + ) + return None + if monitor == 'tmux': + if not ub.find_exe('tmux'): + import warnings + + warnings.warn( + "monitor='tmux' requested but tmux not found; " + 'falling back to inline monitor.' + ) + return self.monitor(onfail=onfail, onexit=onexit) + from cmd_queue.util.util_tmux import tmux as _tmux + + extra_args = [] + if onfail: + extra_args.append(f'--onfail={onfail}') + if onexit: + extra_args.append(f'--onexit={onexit}') + session_name = f'cmdq-monitor-{self.queue_id}' + from rich import print as rich_print + + rich_print( + f'[bold]Launching monitor in tmux session[/bold] {session_name}' + ) + _tmux.spawn_monitor_session( + session_name=session_name, + manifest_path=manifest_path, + attach=False, + verbose=0, + extra_args=extra_args, + ) + job_names = {job.name for job in self.jobs} + + def _is_finished() -> bool: + if not job_names: + return True + info = ub.cmd('squeue --format="%j"') + still_queued = { + line.strip() + for line in info['out'].splitlines() + if line.strip() in job_names + } + return not still_queued + + _tmux.block_with_attach_prompt( + session_name=session_name, + is_finished_fn=_is_finished, + refresh_rate=5.0, + label=f'queue {self.name or self.queue_id}', + ) + return None + raise ValueError( + "monitor must be one of 'hybrid', 'inline', 'tmux', 'none'; " + f'got {monitor!r}' + ) + + def monitor( + self, + refresh_rate: float = 0.4, + # TODO: use or document as unused or make the signature sane across + # clsses + with_textual: str | bool = 'auto', + onfail: str = '', + onexit: str = '', + side_session: Optional[str] = None, + ) -> Optional[Any]: """ - Monitor progress until the jobs are done + Monitor progress until the jobs are done. + + Owns post-run cleanup so that whether the monitor runs inline or + in a separate process (tmux monitor backend, ``cmd_queue + monitor`` CLI), the same finalization happens. + + Args: + onfail (str): if ``'kill'``, scancel the queue's jobs after + the monitor exits when there are failures. Slurm has no + tmux-style sessions to clean up on success, so this only + fires on failure. + onexit (str): currently unused for slurm (kept for API + parity with the tmux backend). CommandLine: xdoctest -m cmd_queue.slurm_queue SlurmQueue.monitor --dev --run @@ -725,21 +919,28 @@ def monitor(self, refresh_rate: float = 0.4) -> Optional[Any]: >>> queue.run() """ - import time - from rich.live import Live - from rich.table import Table import io + import pandas as pd + from rich.table import Table + + from cmd_queue.tmux_queue import ( + _attach_hint_renderable, + _run_live_with_attach, + ) + jobid_history = set() num_at_start = None job_status_table = None if self.jobid_fpath is not None: - class UnableToMonitor(Exception): - ... + + class UnableToMonitor(Exception): ... + try: import json + if not self.jobid_fpath.exists(): raise UnableToMonitor jobid_lut = json.loads(self.jobid_fpath.read_text()) @@ -757,6 +958,8 @@ class UnableToMonitor(Exception): def update_jobid_status(): import rich + + assert job_status_table is not None for row in job_status_table: if row['needs_update']: job_id = row['job_id'] @@ -768,7 +971,7 @@ def update_jobid_status(): if info['JobState'].startswith('FAILED'): row['status'] = 'failed' rich.print(f'[red] Failed job: {info["JobName"]}') - if info["StdErr"] == info["StdOut"]: + if info['StdErr'] == info['StdOut']: rich.print(f'[red] * Logs: {info["StdErr"]}') else: rich.print(f'[red] StdErr: {info["StdErr"]}') @@ -818,7 +1021,9 @@ def update_status_table(): # kills jobs too fast and not when they are in a dependency state not a # a never satisfied state. Killing these jobs here seems to fix # it. - broken_jobs = df[df['NODELIST(REASON)'] == '(DependencyNeverSatisfied)'] + broken_jobs = df[ + df['NODELIST(REASON)'] == '(DependencyNeverSatisfied)' + ] if len(broken_jobs): for name in broken_jobs['NAME']: ub.cmd(f'scancel --name="{name}"') @@ -828,7 +1033,9 @@ def update_status_table(): if job_status_table is not None: update_jobid_status() - state = ub.dict_hist([row['status'] for row in job_status_table]) + state = ub.dict_hist( + [row['status'] for row in job_status_table] + ) state.setdefault('passed', 0) state.setdefault('failed', 0) state.setdefault('skipped', 0) @@ -838,34 +1045,52 @@ def update_status_table(): state['total'] = len(job_status_table) state['other'] = state['total'] - ( - state['passed'] + state['failed'] + state['skipped'] + - state['running'] + state['pending'] + state['passed'] + + state['failed'] + + state['skipped'] + + state['running'] + + state['pending'] ) pass_color = '' fail_color = '' skip_color = '' - finished = (state['pending'] + state['unknown'] + state['running'] == 0) - if (state['failed'] > 0): + finished = ( + state['pending'] + state['unknown'] + state['running'] == 0 + ) + if state['failed'] > 0: fail_color = '[red]' - if (state['skipped'] > 0): + if state['skipped'] > 0: skip_color = '[yellow]' if finished: pass_color = '[green]' - header = ['passed', 'failed', 'skipped', 'running', 'pending', 'other', 'total'] + header = [ + 'passed', + 'failed', + 'skipped', + 'running', + 'pending', + 'other', + 'total', + ] row_values = [ - f"{pass_color}{state['passed']}", - f"{fail_color}{state['failed']}", - f"{skip_color}{state['skipped']}", - f"{state['running']}", - f"{state['pending']}", - f"{state['other']}", - f"{state['total']}", + f'{pass_color}{state["passed"]}', + f'{fail_color}{state["failed"]}', + f'{skip_color}{state["skipped"]}', + f'{state["running"]}', + f'{state["pending"]}', + f'{state["other"]}', + f'{state["total"]}', ] else: # TODO: determine if slurm has accounting on, and if we can # figure out how many jobs errored / passed - header = ['num_running', 'num_in_queue', 'total_monitored', 'num_at_start'] + header = [ + 'num_running', + 'num_in_queue', + 'total_monitored', + 'num_at_start', + ] row_values = [ f'{num_running}', f'{num_in_queue}', @@ -875,28 +1100,62 @@ def update_status_table(): # row_values.append(str(state.get('FAIL', 0))) # row_values.append(str(state.get('SKIPPED', 0))) # row_values.append(str(state.get('PENDING', 0))) - finished = (num_in_queue == 0) + finished = num_in_queue == 0 - table = Table(*header, - title='slurm-monitor') + table = Table(*header, title='slurm-monitor') table.add_row(*row_values) return table, finished + agg_state: Dict[str, Any] = {} + + def _update_agg_state() -> None: + if job_status_table is None: + return + counts = ub.dict_hist([row['status'] for row in job_status_table]) + for key in ('passed', 'failed', 'skipped'): + agg_state[key] = counts.get(key, 0) + agg_state['total'] = len(job_status_table) + try: - table, finished = update_status_table() + import sys + + from rich.console import Group + + def _build_renderable() -> Any: + table, finished = update_status_table() + hint = ( + _attach_hint_renderable(side_session) + if side_session + else None + ) + renderable = Group(table, hint) if hint is not None else table + # The slurm Live loop tracks completion via a separate + # variable than tmux; agg_state is updated post-loop. + return renderable, finished, None + refresh_rate = 0.4 - with Live(table, refresh_per_second=4) as live: - while not finished: - time.sleep(refresh_rate) - table, finished = update_status_table() - live.update(table) + use_keys = side_session is not None and sys.stdin.isatty() + _run_live_with_attach( + build_renderable=_build_renderable, + refresh_rate=refresh_rate, + side_session=side_session if use_keys else None, + ) + _update_agg_state() except KeyboardInterrupt: from rich.prompt import Confirm + flag = Confirm.ask('do you to kill the procs?') if flag: self.kill() + return agg_state + + # Slurm has no idle sessions to clean up on success, so onfail='kill' + # only fires when there are observed failures. + if onfail == 'kill' and agg_state.get('failed'): + self.kill() + return agg_state def kill(self) -> None: cancel_commands = [] @@ -910,6 +1169,58 @@ def read_state(self) -> Dict[str, Any]: # this return {} + def _build_monitor_manifest(self) -> Dict[str, Any]: + """Snapshot enough state for an out-of-process monitor to reattach.""" + return { + 'backend': 'slurm', + 'name': self.name or self.queue_id, + 'queue_id': self.queue_id, + 'dpath': str(self.dpath), + 'fpath': str(self.fpath), + 'jobid_fpath': str(self.jobid_fpath) if self.jobid_fpath else None, + 'job_names': [job.name for job in self.jobs], + } + + def _write_monitor_manifest(self) -> Any: + """Persist the monitor manifest to ``/monitor_manifest.json``.""" + from cmd_queue import monitor_manifest as mm + + path = mm.manifest_path_for_dpath(self.dpath) + manifest = self._build_monitor_manifest() + mm.write_manifest(manifest, path) + # Register under both queue_id (always unique) and the user-supplied + # name (when distinct) so `cmd_queue monitor ` finds it. + mm.update_active_index(self.queue_id, path) + if self.name and self.name != self.queue_id: + mm.update_active_index(self.name, path) + return path + + @classmethod + def _from_manifest(cls, manifest: Dict[str, Any]) -> 'SlurmQueue': + """Reconstruct a queue suitable for ``monitor()`` / ``kill()`` only.""" + self = cls.__new__(cls) + base_queue.Queue.__init__(self) + self.queue_id = manifest['queue_id'] + self.name = manifest.get('name', self.queue_id) + self.dpath = ub.Path(manifest['dpath']) + self.fpath = ub.Path(manifest['fpath']) + self.log_dpath = self.dpath / 'logs' + self.shell = None + self.preamble = [] + self.all_depends = None + self._sbatch_kvargs = ub.udict() + self._sbatch_flags = ub.udict() + self._include_monitor_metadata = False + jobid_fpath = manifest.get('jobid_fpath') + self.jobid_fpath = ub.Path(jobid_fpath) if jobid_fpath else None + self.unused_kwargs = {} + # The reconstructed jobs only need a name for kill() (scancel --name). + self.jobs = [ + SlurmJob(command='', name=name) + for name in manifest.get('job_names', []) + ] + return self + def print_commands(self, *args: Any, **kwargs: Any) -> None: r""" Print info about the commands, optionally with rich @@ -981,11 +1292,19 @@ def parse_scontrol_output(output: str) -> dict: parse_scontrol_output(output) """ import re + # These keys should be the last key on a line. They are allowed to contain # space and equal characters. special_keys = [ - 'JobName', 'WorkDir', 'StdErr', 'StdIn', 'StdOut', 'Command', - 'NodeList', 'BatchHost', 'Partition' + 'JobName', + 'WorkDir', + 'StdErr', + 'StdIn', + 'StdOut', + 'Command', + 'NodeList', + 'BatchHost', + 'Partition', ] patterns = '(' + '|'.join(f' {re.escape(k)}=' for k in special_keys) + ')' pat = re.compile(patterns) @@ -1001,7 +1320,7 @@ def parse_scontrol_output(output: str) -> dict: # Special case: Key is a special key with a space startpos = match.start() leading_part = line[:startpos] - special_part = line[startpos + 1:] + special_part = line[startpos + 1 :] key, value = special_part.split('=', 1) parsed_data[key] = value.strip() line = leading_part diff --git a/cmd_queue/slurmify.py b/cmd_queue/slurmify.py index 7947d3d..7fb4171 100644 --- a/cmd_queue/slurmify.py +++ b/cmd_queue/slurmify.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 r""" Helper script to wrap a command with sbatch, but using a more srun like syntax. @@ -15,7 +16,6 @@ -- \ python -c 'import sys; print("hello world"); sys.exit(0)' """ -#!/usr/bin/env python3 import scriptconfig as scfg import ubelt as ub @@ -23,21 +23,35 @@ class SlurmifyCLI(scfg.DataConfig): __command__ = 'slurmify' - jobname = scfg.Value(None, help='for submit, this is the name of the new job') + jobname = scfg.Value( + None, help='for submit, this is the name of the new job' + ) depends = scfg.Value(None, help='comma separated jobnames to depend on') - command = scfg.Value(None, type=str, position=1, nargs='*', help=ub.paragraph( - ''' + command = scfg.Value( + None, + type=str, + position=1, + nargs='*', + help=ub.paragraph( + """ Specifies the bash command to queue. Care must be taken when specifying this argument. If specifying as a key/value pair argument, it is important to quote and escape the bash command properly. A more convenient way to specify this command is as a positional argument. End all of the options to this CLI with `--` and then specify your full command. - ''')) + """ + ), + ) - gpus = scfg.Value(None, help='a comma separated list of the gpu numbers to spread across. tmux backend only.') - workers = scfg.Value(1, help='number of concurrent queues for the tmux backend.') + gpus = scfg.Value( + None, + help='a comma separated list of the gpu numbers to spread across. tmux backend only.', + ) + workers = scfg.Value( + 1, help='number of concurrent queues for the tmux backend.' + ) mem = scfg.Value(None, help='') partition = scfg.Value(1, help='slurm partition') @@ -59,6 +73,7 @@ def main(cls, argv=1, **kwargs): """ import rich from rich.markup import escape + config = cls.cli(argv=argv, data=kwargs, strict=True) rich.print('config = ' + escape(ub.urepr(config, nl=1))) @@ -71,6 +86,7 @@ def main(cls, argv=1, **kwargs): row['depends'] = config.depends import cmd_queue + queue = cmd_queue.Queue.create( size=max(1, config['workers']), backend='slurm', @@ -87,13 +103,17 @@ def main(cls, argv=1, **kwargs): if len(bash_command) == 1: # hack import shlex + if shlex.quote(bash_command[0]) == bash_command[0]: bash_command = bash_command[0] else: bash_command = shlex.quote(bash_command[0]) else: import shlex - bash_command = ' '.join([shlex.quote(str(p)) for p in bash_command]) + + bash_command = ' '.join( + [shlex.quote(str(p)) for p in bash_command] + ) submitkw = ub.udict(row) & {'name', 'depends'} queue.submit(bash_command, log=False, **submitkw) except Exception: @@ -104,6 +124,7 @@ def main(cls, argv=1, **kwargs): # config.cli_queue_fpath.write_text(json.dumps(row)) # 'sbatch --job-name="test_job1" --output="$HOME/.cache/slurm/logs/job-%j-%x.out" --wrap="" + __cli__ = SlurmifyCLI if __name__ == '__main__': diff --git a/cmd_queue/tmux_queue.py b/cmd_queue/tmux_queue.py index dc83d90..f5d8f38 100644 --- a/cmd_queue/tmux_queue.py +++ b/cmd_queue/tmux_queue.py @@ -1,6 +1,3 @@ -from __future__ import annotations -# mypy: ignore-errors - """ A very simple queue based on tmux and bash @@ -51,14 +48,14 @@ >>> queue.run() """ +from __future__ import annotations import uuid from typing import Any, Dict, Iterable, List, Optional import ubelt as ub -# import itertools as it -from cmd_queue import base_queue -from cmd_queue import serial_queue +# import itertools as it +from cmd_queue import base_queue, serial_queue from cmd_queue.util.util_tmux import tmux @@ -199,6 +196,7 @@ class TMUXMultiQueue(base_queue.Queue): >>> if self.is_available(): >>> self.run(with_textual=False, check_other_sessions=0) """ + def __init__( self, size: int = 1, @@ -213,7 +211,11 @@ def __init__( super().__init__() if rootid is None: - rootid = str(ub.timestamp().split('T')[0]) + '_' + ub.hash_data(uuid.uuid4())[0:8] + rootid = ( + str(ub.timestamp().split('T')[0]) + + '_' + + ub.hash_data(uuid.uuid4())[0:8] + ) if name is None: name = 'unnamed' self.name = name @@ -229,7 +231,9 @@ def __init__( # Note: size can be changed as long as it happens before the queue is # written and run. if size <= 0: - raise ValueError(f'tmux queue size must be positive got size={size}') + raise ValueError( + f'tmux queue size must be positive got size={size}' + ) self.size = size self.environ = environ self.fpath = self.dpath / f'run_queues_{self.name}.sh' @@ -261,22 +265,28 @@ def is_available(cls) -> bool: def _new_workers(self, start: int = 0) -> List[serial_queue.SerialQueue]: import itertools as it + per_worker_environs = [self.environ] * self.size if self.gpus: # TODO: more sophisticated GPU policy? per_worker_environs = [ - ub.dict_union(e, { - 'CUDA_VISIBLE_DEVICES': str(cvd), - }) + ub.dict_union( + e, + { + 'CUDA_VISIBLE_DEVICES': str(cvd), + }, + ) for cvd, e in zip(it.cycle(self.gpus), per_worker_environs) ] workers = [ serial_queue.SerialQueue( - name='{}{}_{:03d}'.format(self._tmux_session_prefix, self.name, worker_idx), + name='{}{}_{:03d}'.format( + self._tmux_session_prefix, self.name, worker_idx + ), rootid=self.rootid, dpath=self.dpath, - environ=e + environ=e, ) for worker_idx, e in enumerate(per_worker_environs, start=start) ] @@ -285,7 +295,9 @@ def _new_workers(self, start: int = 0) -> List[serial_queue.SerialQueue]: def __nice__(self) -> str: return ub.urepr(self.jobs) - def _semaphore_wait_command(self, flag_fpaths: Iterable[str], msg: str) -> str: + def _semaphore_wait_command( + self, flag_fpaths: Iterable[str], msg: str + ) -> str: r""" TODO: use flock? or inotify? @@ -326,15 +338,16 @@ def _semaphore_wait_command(self, flag_fpaths: Iterable[str], msg: str) -> str: sleep 1; done printf "finished {msg} " - ''') + ''' + ) return command def _semaphore_signal_command(self, flag_fpath): return ub.codeblock( - f''' + f""" # Signal this worker is complete mkdir -p {flag_fpath.parent} && touch {flag_fpath} - ''' + """ ) def order_jobs(self) -> None: @@ -425,6 +438,7 @@ def order_jobs(self) -> None: >>> self.print_commands() """ import networkx as nx + graph = self._dependency_graph() # Get rid of implicit dependencies @@ -442,7 +456,7 @@ def order_jobs(self) -> None: print(nx.is_directed_acyclic_graph(graph)) simple_cycles = list(nx.cycles.simple_cycles(graph)) print('simple_cycles = {}'.format(ub.urepr(simple_cycles, nl=1))) - nx.write_network_text(graph, print, end="") + nx.write_network_text(graph, print, end='') raise in_cut_nodes = set() @@ -465,7 +479,9 @@ def order_jobs(self) -> None: cut_graph.remove_edges_from(cut_edges) # Get all the node groups disconnected by the cuts - condensed = nx.condensation(reduced_graph, nx.weakly_connected_components(cut_graph)) + condensed = nx.condensation( + reduced_graph, nx.weakly_connected_components(cut_graph) + ) # TODO: can we use nx.topological_generations for a more elegant # solution here? @@ -476,17 +492,24 @@ def order_jobs(self) -> None: condensed_order = list(nx.topological_sort(condensed)) for c_node in condensed_order: members = set(condensed.nodes[c_node]['members']) - ancestors = set(ub.flatten([nx.ancestors(reduced_graph, m) for m in members])) + ancestors = set( + ub.flatten([nx.ancestors(reduced_graph, m) for m in members]) + ) cut_in_ancestors = ancestors & in_cut_nodes cut_out_ancestors = ancestors & out_cut_nodes cut_in_members = members & in_cut_nodes - rank = len(cut_in_members) + len(cut_out_ancestors) + len(cut_in_ancestors) + rank = ( + len(cut_in_members) + + len(cut_out_ancestors) + + len(cut_in_ancestors) + ) for m in members: rankings[rank].update(members) if 0: - from graphid.util import util_graphviz import kwplot + from graphid.util import util_graphviz + kwplot.autompl() util_graphviz.show_nx(graph, fnum=1) util_graphviz.show_nx(reduced_graph, fnum=3) @@ -507,10 +530,15 @@ def order_jobs(self) -> None: # Ranked bins # Solve a bin packing problem to partition these into self.size groups from cmd_queue.util.util_algo import balanced_number_partitioning + # Weighting by job heaviness would help here. group_weights = list(map(len, parallel_groups)) - groupxs = balanced_number_partitioning(group_weights, num_parts=self.size) - rank_groups = [list(ub.take(parallel_groups, gxs)) for gxs in groupxs] + groupxs = balanced_number_partitioning( + group_weights, num_parts=self.size + ) + rank_groups = [ + list(ub.take(parallel_groups, gxs)) for gxs in groupxs + ] rank_groups = [g for g in rank_groups if len(g)] # Reorder each group to better agree with submission order @@ -520,8 +548,12 @@ def order_jobs(self) -> None: for nodes in group: nodes_index = min(graph.nodes[n]['index'] for n in nodes) priorities.append(nodes_index) - final_queue_order = list(ub.flatten(ub.take(group, ub.argsort(priorities)))) - final_queue_jobs = [graph.nodes[n]['job'] for n in final_queue_order] + final_queue_order = list( + ub.flatten(ub.take(group, ub.argsort(priorities))) + ) + final_queue_jobs = [ + graph.nodes[n]['job'] for n in final_queue_order + ] rank_jobs.append(final_queue_jobs) ranked_job_groups.append(rank_jobs) @@ -534,7 +566,7 @@ def order_jobs(self) -> None: ranked_job_groups = [[serial_groups]] queue_workers = [] - flag_dpath = (self.dpath / 'semaphores') + flag_dpath = self.dpath / 'semaphores' prev_rank_flag_fpaths = None for rank, rank_jobs in enumerate(ranked_job_groups): # Hack, abuse init workers each time to construct workers @@ -544,7 +576,10 @@ def order_jobs(self) -> None: # Add a dummy job to wait for dependencies of this linear queue if prev_rank_flag_fpaths: - command = self._semaphore_wait_command(prev_rank_flag_fpaths, msg=f"wait for previous rank {rank - 1}") + command = self._semaphore_wait_command( + prev_rank_flag_fpaths, + msg=f'wait for previous rank {rank - 1}', + ) # Note: this should not be a real job worker.submit(command, bookkeeper=1) @@ -560,7 +595,10 @@ def order_jobs(self) -> None: rank_flag_fpaths = [] num_rank_workers = len(rank_workers) for worker_idx, worker in enumerate(rank_workers): - rank_flag_fpath = flag_dpath / f'rank_flag_{rank}_{worker_idx}_{num_rank_workers}.done' + rank_flag_fpath = ( + flag_dpath + / f'rank_flag_{rank}_{worker_idx}_{num_rank_workers}.done' + ) command = self._semaphore_signal_command(rank_flag_fpath) # Note: this should not be a real job worker.submit(command, bookkeeper=1) @@ -576,26 +614,32 @@ def order_jobs(self) -> None: def finalize_text(self, **kwargs: Any) -> str: self.order_jobs() # Create a driver script - driver_lines = [ub.codeblock( - f''' + driver_lines = [ + ub.codeblock( + f""" #!/bin/bash # Driver script to start the tmux-queue echo "Submitting {self.num_real_jobs} jobs to a tmux queue" - ''')] + """ + ) + ] for queue in self.workers: # run_command_in_tmux_queue(command, name) # TODO: figure out how to forward environment variables from the # running sessions. We dont want to log secrets to plaintext. part = ub.codeblock( - f''' + f""" ### Run Queue: {queue.pathid} with {len(queue)} jobs tmux new-session -d -s {queue.pathid} "bash" tmux send -t {queue.pathid} \\ "source {queue.fpath}" \\ Enter - ''').format() + """ + ).format() driver_lines.append(part) - driver_lines += [f'echo "Spread jobs across {len(self.workers)} tmux workers"'] + driver_lines += [ + f'echo "Spread jobs across {len(self.workers)} tmux workers"' + ] driver_text = '\n\n'.join(driver_lines) return driver_text @@ -611,7 +655,10 @@ def kill_other_queues(self, ask_first: bool = True) -> None: cmd_queue and kill them. """ import parse - queue_name_pattern = parse.Parser(self._tmux_session_prefix + '{name}_{rootid}') + + queue_name_pattern = parse.Parser( + self._tmux_session_prefix + '{name}_{rootid}' + ) current_sessions = self._tmux_current_sessions() other_session_ids = [] for info in current_sessions: @@ -621,7 +668,9 @@ def kill_other_queues(self, ask_first: bool = True) -> None: other_session_ids.append(info['id']) # print(f'other_session_ids={other_session_ids}') if other_session_ids: - print(f'Detected {len(other_session_ids)} other running cmd-queue sessions with the same name') + print( + f'Detected {len(other_session_ids)} other running cmd-queue sessions with the same name' + ) print('Commands to kill them:') kill_commands = [] for sess_id in other_session_ids: @@ -629,13 +678,17 @@ def kill_other_queues(self, ask_first: bool = True) -> None: print(command2) kill_commands.append(command2) from rich import prompt - if not ask_first or prompt.Confirm().ask('Do you want to kill the other sessions?'): + + if not ask_first or prompt.Confirm().ask( + 'Do you want to kill the other sessions?' + ): for command in kill_commands: ub.cmd(command, verbose=self.cmd_verbose) def handle_other_sessions(self, other_session_handler: str) -> None: if other_session_handler == 'auto': from cmd_queue.tmux_queue import has_stdin + if has_stdin(): other_session_handler = 'ask' else: @@ -658,6 +711,7 @@ def run( with_textual: str = 'auto', check_other_sessions: Optional[bool] = None, other_session_handler: str = 'auto', + monitor: str = 'hybrid', **kw: Any, ) -> None: """ @@ -669,6 +723,26 @@ def run( with the same queue name. Can be 'kill', 'ask', or 'ignore', or 'auto' - which defaults to 'ask' if stdin is available and 'kill' if it is not. + + monitor (str): + Where the live status UI runs while ``block=True``. + + * ``'hybrid'`` (default): renders the inline UI in the + current shell *and* spawns a detached ``cmd_queue + monitor`` tmux session alongside. Press ``[a]`` from + the inline UI to attach (or switch-client) to the + tmux session; ``[q]`` stops watching. The side + session is killed when the inline monitor exits. + * ``'inline'``: renders only in the current shell. No + tmux session is spawned. Closing the shell loses the + view. + * ``'tmux'``: spawns ``cmd_queue monitor --manifest=...`` + only in a detached tmux session. The current process + blocks until jobs finish (and runs the post-run + cleanup), so detaching the tmux UI does not return + control to the caller. + * ``'none'``: no UI; the call still blocks via a headless + state-file poll when ``block=True``. """ if not self.is_available(): @@ -680,7 +754,8 @@ def run( if check_other_sessions: ub.schedule_deprecation( - 'tmux_queue', 'check_other_sessions', 'argument') + 'tmux_queue', 'check_other_sessions', 'argument' + ) if check_other_sessions == 'auto': if not has_stdin(): check_other_sessions = False @@ -688,16 +763,201 @@ def run( self.kill_other_queues(ask_first=True) self.write() - ub.cmd(f'bash {self.fpath}', verbose=self.cmd_verbose, check=True, - system=system) - if block: - agg_state = self.monitor(with_textual=with_textual) - if onexit == 'capture': - self.capture() - if not agg_state['failed']: - if onfail == 'kill': - self.kill() + manifest_path = self._write_monitor_manifest() + ub.cmd( + f'bash {self.fpath}', + verbose=self.cmd_verbose, + check=True, + system=system, + ) + if not block: + return None + return self._dispatch_monitor( + monitor=monitor, + manifest_path=manifest_path, + onfail=onfail, + onexit=onexit, + with_textual=with_textual, + ) + + def _print_done_summary(self, agg_state: Dict[str, Any]) -> None: + from rich import print as rich_print + + failed = agg_state.get('failed', 0) + passed = agg_state.get('passed', 0) + skipped = agg_state.get('skipped', 0) + total = agg_state.get('total', 0) + if failed: + status_str = '[bold red]FAILED[/bold red]' + else: + status_str = '[bold green]PASSED[/bold green]' + rich_print( + f'\nQueue complete: {status_str} ' + f'passed=[green]{passed}[/green] ' + f'failed=[red]{failed}[/red] ' + f'skipped=[yellow]{skipped}[/yellow] ' + f'total={total}' + ) + failed_jobs, skipped_jobs, status_by_name = ( + self._collect_failed_and_skipped() + ) + if failed_jobs: + rich_print('[bold red]Failed jobs:[/bold red]') + any_log_missing = False + for job in failed_jobs: + log_fpath = getattr(job, 'log_fpath', None) + if ( + getattr(job, 'log', False) + and log_fpath is not None + and log_fpath.exists() + ): + rich_print(f' [red]{job.name}[/red] log: {log_fpath}') + else: + any_log_missing = True + rich_print(f' [red]{job.name}[/red] [dim](no log)[/dim]') + if any_log_missing: + rich_print( + '[yellow]Note:[/yellow] failure logs are not ' + 'enabled for some failed jobs (pass log=True at ' + 'submit time to capture stdout/stderr to disk).' + ) + if skipped_jobs: + rich_print('[bold yellow]Skipped jobs:[/bold yellow]') + for job in skipped_jobs: + reason = self._skip_reason(job, status_by_name) + if reason: + rich_print(f' [yellow]{job.name}[/yellow] ({reason})') + else: + rich_print(f' [yellow]{job.name}[/yellow]') + + def _dispatch_monitor( + self, + monitor: str, + manifest_path: Any, + onfail: str, + onexit: str, + with_textual: str = 'auto', + ) -> Any: + if monitor == 'inline': + return self.monitor( + with_textual=with_textual, + onfail=onfail, + onexit=onexit, + ) + if monitor == 'hybrid': + side_session = None + if ub.find_exe('tmux'): + extra_args = [] + if onfail: + extra_args.append(f'--onfail={onfail}') + if onexit: + extra_args.append(f'--onexit={onexit}') + side_session = f'cmdq-monitor-{self.pathid}' + from rich import print as rich_print + + rich_print( + f'[dim]Spawned attachable monitor in tmux session[/dim] ' + f'{side_session} [dim](press [a] to attach)[/dim]' + ) + tmux.spawn_monitor_session( + session_name=side_session, + manifest_path=manifest_path, + attach=False, + verbose=0, + extra_args=extra_args, + ) + else: + import warnings + + warnings.warn( + "monitor='hybrid' requested but tmux not found; " + 'falling back to inline-only monitor.' + ) + try: + return self.monitor( + with_textual=with_textual, + onfail=onfail, + onexit=onexit, + side_session=side_session, + ) + finally: + if side_session and tmux.has_session(side_session): + tmux.kill_session(side_session, verbose=0) + if monitor == 'none': + from rich import print as rich_print + + rich_print( + '[bold]Queue running detached.[/bold] ' + f'Reattach with: cmd_queue monitor --manifest={manifest_path}' + ) + agg_state = self._headless_block_until_done() + self._print_done_summary(agg_state) + return agg_state + if monitor == 'tmux': + if not ub.find_exe('tmux'): + import warnings + + warnings.warn( + "monitor='tmux' requested but tmux not found; " + 'falling back to inline monitor.' + ) + return self.monitor( + with_textual=with_textual, + onfail=onfail, + onexit=onexit, + ) + extra_args = [] + if onfail: + extra_args.append(f'--onfail={onfail}') + if onexit: + extra_args.append(f'--onexit={onexit}') + session_name = f'cmdq-monitor-{self.pathid}' + from rich import print as rich_print + + rich_print( + f'[bold]Launching monitor in tmux session[/bold] {session_name}' + ) + tmux.spawn_monitor_session( + session_name=session_name, + manifest_path=manifest_path, + attach=False, + verbose=0, + extra_args=extra_args, + ) + + # Don't pull the user's terminal into the monitor session; let + # them attach on demand and freely detach back to this shell. + def _is_finished() -> bool: + _, finished, _ = self._build_status_table() + return finished + + tmux.block_with_attach_prompt( + session_name=session_name, + is_finished_fn=_is_finished, + refresh_rate=1.0, + label=f'queue {self.name}', + ) + _, _, agg_state = self._build_status_table() + self._print_done_summary(agg_state) return agg_state + raise ValueError( + "monitor must be one of 'hybrid', 'inline', 'tmux', 'none'; " + f'got {monitor!r}' + ) + + def _headless_block_until_done(self, refresh_rate: float = 1.0) -> Any: + """Poll the per-worker state files until all workers are finished. + + Used as the parent-side block-wait when the visible monitor is + running elsewhere (in a tmux session, or not at all). + """ + import time + + while True: + table, finished, agg_state = self._build_status_table() + if finished: + return agg_state + time.sleep(refresh_rate) def read_state(self) -> Any: agg_state = {} @@ -734,9 +994,34 @@ def serial_run(self) -> None: for fpath in queue_fpaths: ub.cmd(f'{fpath}', verbose=self.cmd_verbose, check=True) - def monitor(self, refresh_rate: float = 0.4, with_textual: str = 'auto') -> None: + def monitor( + self, + refresh_rate: float = 0.4, + with_textual: str | bool = 'auto', + onfail: str = '', + onexit: str = '', + side_session: Optional[str] = None, + ) -> None: """ - Monitor progress until the jobs are done + Monitor progress until the jobs are done. + + Owns post-run cleanup so that whether the monitor runs inline or + in a separate process (tmux monitor backend, ``cmd_queue + monitor`` CLI), the same finalization happens. + + Args: + onfail (str): if ``'kill'`` and the queue ends with no + failures, kill the now-idle tmux sessions. (The arg is + named for historical reasons; the original behavior was + "tear down on a clean exit, leave alive on failure so + the user can investigate.") + onexit (str): if ``'capture'``, dump tmux pane contents + after the queue finishes. + side_session (str | None): name of an attachable tmux + monitor session running alongside the inline UI. When + set, the inline monitor binds ``a`` to attach (or + switch-client) to this session. Caller is responsible + for spawning/cleaning up the session. CommandLine: xdoctest -m cmd_queue.tmux_queue TMUXMultiQueue.monitor:0 @@ -789,13 +1074,18 @@ def monitor(self, refresh_rate: float = 0.4, with_textual: str = 'auto') -> None with_textual = False if with_textual: - self._textual_monitor() + self._textual_monitor(side_session=side_session) else: - self._simple_rich_monitor(refresh_rate) + self._simple_rich_monitor(refresh_rate, side_session=side_session) table, finished, agg_state = self._build_status_table() + if onexit == 'capture': + self.capture() + if onfail == 'kill' and not agg_state.get('failed'): + self.kill() + self._print_done_summary(agg_state) return agg_state - def _textual_monitor(self): + def _textual_monitor(self, side_session: Optional[str] = None): from rich import print as rich_print if 0: @@ -806,46 +1096,197 @@ def _textual_monitor(self): is_running = True while is_running: table_fn = self._build_status_table - app = CmdQueueMonitorApp(table_fn, kill_fn=self.kill) + app = CmdQueueMonitorApp( + table_fn, kill_fn=self.kill, attach_session=side_session + ) app.run() table, finished, agg_state = self._build_status_table() rich_print(table) + if getattr(app, 'attach_requested', False): + # User pressed 'a' inside the textual UI; perform the + # attach (or switch-client) now that textual has released + # the terminal, then re-enter the textual loop. + app.attach_requested = False + if side_session is not None: + _attach_or_switch(side_session) + continue + if app.graceful_exit: is_running = False else: from rich.prompt import Confirm + flag = Confirm.ask('do you to kill the procs?') if flag: self.kill() is_running = False - def _simple_rich_monitor(self, refresh_rate=0.4): - import time - from rich.live import Live + def _collect_failed_and_skipped(self): + """Walk worker.jobs and partition into failed / skipped lists. + + A job is *failed* if its fail_fpath exists, and *skipped* if its + skip_fpath exists. The two are mutually exclusive: the bash + boilerplate writes one or the other but never both. + """ + failed = [] + skipped = [] + # Map job name -> status so we can fill in skip reasons. + status_by_name: Dict[str, str] = {} + for worker in self.workers: + for job in getattr(worker, 'jobs', []): + fail_fpath = getattr(job, 'fail_fpath', None) + skip_fpath = getattr(job, 'skip_fpath', None) + if fail_fpath is not None and fail_fpath.exists(): + failed.append(job) + if getattr(job, 'name', None): + status_by_name[job.name] = 'failed' + elif skip_fpath is not None and skip_fpath.exists(): + skipped.append(job) + if getattr(job, 'name', None): + status_by_name[job.name] = 'skipped' + return failed, skipped, status_by_name + + @staticmethod + def _skip_reason(job: Any, status_by_name: Dict[str, str]) -> str: + """Best-effort explanation of why a job was skipped. + + Looks at the job's recorded dependency names and reports the + first one whose status is not 'passed'. Returns a short string + like 'dep proc-A failed' or '' if no clear reason. + """ + depends = getattr(job, 'depends', None) or [] + bad = [] + for dep_name in depends: + if not dep_name: + continue + st = status_by_name.get(dep_name) + if st in ('failed', 'skipped'): + bad.append((dep_name, st)) + if not bad: + return '' + if len(bad) == 1: + name, st = bad[0] + return f'dep {name} {st}' + names = ', '.join(f'{n} {s}' for n, s in bad) + return f'deps: {names}' + + def _build_failed_jobs_renderable(self) -> Any: + """Renderable summary of failed and skipped jobs, or None. + + Used by the live monitor to surface failures and skips (and the + reason for each skip) as soon as they happen, rather than only + in the post-run summary. + """ + failed, skipped, status_by_name = self._collect_failed_and_skipped() + if not failed and not skipped: + return None + from rich.console import Group + from rich.table import Table + from rich.text import Text + + renderables = [] + any_log_missing = False + + if failed: + ftable = Table( + title='Failed jobs', + title_style='bold red', + show_header=True, + header_style='red', + ) + ftable.add_column('name', style='red') + ftable.add_column('log') + for job in failed: + log_fpath = getattr(job, 'log_fpath', None) + if ( + getattr(job, 'log', False) + and log_fpath is not None + and log_fpath.exists() + ): + ftable.add_row(job.name, str(log_fpath)) + else: + any_log_missing = True + ftable.add_row(job.name, '[dim](no log)[/dim]') + renderables.append(ftable) + + if skipped: + stable = Table( + title='Skipped jobs', + title_style='bold yellow', + show_header=True, + header_style='yellow', + ) + stable.add_column('name', style='yellow') + stable.add_column('reason') + for job in skipped: + reason = self._skip_reason(job, status_by_name) + stable.add_row(job.name, reason or '[dim](unknown)[/dim]') + renderables.append(stable) + + if any_log_missing: + renderables.append( + Text( + 'Note: failure logs are not enabled for some failed ' + 'jobs (pass log=True at submit time).', + style='yellow', + ) + ) + + if len(renderables) == 1: + return renderables[0] + return Group(*renderables) + + def _build_live_renderable(self, side_session: Optional[str] = None): + from rich.console import Group + + table, finished, agg_state = self._build_status_table() + failed = self._build_failed_jobs_renderable() + hint = _attach_hint_renderable(side_session) if side_session else None + parts = [p for p in (table, failed, hint) if p is not None] + renderable = Group(*parts) if len(parts) > 1 else parts[0] + return renderable, finished, agg_state + + def _simple_rich_monitor( + self, refresh_rate=0.4, side_session: Optional[str] = None + ): + import sys + if 0: print('Kill commands:') for command in self._kill_commands(): print(command) + + use_keys = side_session is not None and sys.stdin.isatty() try: - table, finished, agg_state = self._build_status_table() - with Live(table, refresh_per_second=4) as live: - while not finished: - time.sleep(refresh_rate) - table, finished, agg_state = self._build_status_table() - live.update(table) + _run_live_with_attach( + build_renderable=lambda: self._build_live_renderable( + side_session=side_session, + ), + refresh_rate=refresh_rate, + side_session=side_session if use_keys else None, + ) except KeyboardInterrupt: from rich.prompt import Confirm + flag = Confirm.ask('do you to kill the procs?') if flag: self.kill() def _build_status_table(self): from rich.table import Table + # https://rich.readthedocs.io/en/stable/live.html table = Table() - columns = ['tmux session name', 'status', 'passed', 'failed', 'skipped', 'total'] + columns = [ + 'tmux session name', + 'status', + 'passed', + 'failed', + 'skipped', + 'total', + ] for col in columns: table.add_column(col) @@ -856,7 +1297,7 @@ def _build_status_table(self): 'failed': 0, 'passed': 0, 'skipped': 0, - 'total': 0 + 'total': 0, } for worker in self.workers: @@ -868,12 +1309,12 @@ def _build_status_table(self): finished = False pass_color = '[yellow]' else: - finished &= (state['status'] == 'done') + finished &= state['status'] == 'done' if state['status'] == 'done': pass_color = '[green]' - if (state['failed'] > 0): + if state['failed'] > 0: fail_color = '[red]' - if (state['skipped'] > 0): + if state['skipped'] > 0: skip_color = '[yellow]' agg_state['total'] += state['total'] @@ -884,10 +1325,10 @@ def _build_status_table(self): table.add_row( state['name'], state['status'], - f"{pass_color}{state['passed']}", - f"{fail_color}{state['failed']}", - f"{skip_color}{state['skipped']}", - f"{state['total']}", + f'{pass_color}{state["passed"]}', + f'{fail_color}{state["failed"]}', + f'{skip_color}{state["skipped"]}', + f'{state["total"]}', ) if not finished: @@ -899,10 +1340,10 @@ def _build_status_table(self): table.add_row( agg_state['name'], agg_state['status'], - f"{agg_state['passed']}", - f"{agg_state['failed']}", - f"{agg_state['skipped']}", - f"{agg_state['total']}", + f'{agg_state["passed"]}', + f'{agg_state["failed"]}', + f'{agg_state["skipped"]}', + f'{agg_state["total"]}', ) return table, finished, agg_state @@ -963,7 +1404,9 @@ def current_output(self) -> None: for queue in self.workers: print('\n\nqueue = {!r}'.format(queue)) # First print out the contents for debug - tmux.capture_pane(target_session=queue.pathid, verbose=self.cmd_verbose) + tmux.capture_pane( + target_session=queue.pathid, verbose=self.cmd_verbose + ) def _print_commands(self): # First print out the contents for debug @@ -989,9 +1432,216 @@ def _tmux_current_sessions(self): sessions = tmux.list_sessions() return sessions + def _build_monitor_manifest(self) -> Dict[str, Any]: + """Snapshot enough state for an out-of-process monitor to reattach.""" + workers_info = [] + for worker in self.workers: + jobs_info = [] + for job in getattr(worker, 'jobs', []): + fail_fpath = getattr(job, 'fail_fpath', None) + skip_fpath = getattr(job, 'skip_fpath', None) + log_fpath = getattr(job, 'log_fpath', None) + depends = getattr(job, 'depends', None) or [] + depends_names = [ + getattr(d, 'name', None) + for d in depends + if d is not None and getattr(d, 'name', None) + ] + jobs_info.append( + { + 'name': getattr(job, 'name', None), + 'log': bool(getattr(job, 'log', False)), + 'fail_fpath': str(fail_fpath) if fail_fpath else None, + 'skip_fpath': str(skip_fpath) if skip_fpath else None, + 'log_fpath': str(log_fpath) if log_fpath else None, + 'depends': depends_names, + } + ) + workers_info.append( + { + 'name': worker.name, + 'rootid': worker.rootid, + 'dpath': str(worker.dpath), + 'pathid': worker.pathid, + 'state_fpath': str(worker.state_fpath), + 'fpath': str(worker.fpath), + 'environ': dict(worker.environ or {}), + 'jobs': jobs_info, + } + ) + return { + 'backend': 'tmux', + 'name': self.name, + 'rootid': self.rootid, + 'pathid': self.pathid, + 'dpath': str(self.dpath), + 'fpath': str(self.fpath), + 'size': self.size, + 'gpus': self.gpus, + 'tmux_session_prefix': self._tmux_session_prefix, + 'workers': workers_info, + } + + def _write_monitor_manifest(self) -> Any: + """Persist the monitor manifest to ``/monitor_manifest.json``.""" + from cmd_queue import monitor_manifest as mm + + path = mm.manifest_path_for_dpath(self.dpath) + manifest = self._build_monitor_manifest() + mm.write_manifest(manifest, path) + mm.update_active_index(self.name, path) + return path + + @classmethod + def _from_manifest(cls, manifest: Dict[str, Any]) -> 'TMUXMultiQueue': + """Reconstruct a queue suitable for ``monitor()`` / ``kill()`` only.""" + self = cls.__new__(cls) + # Initialize the base Queue state without re-creating workers / dpaths. + base_queue.Queue.__init__(self) + self.name = manifest['name'] + self.rootid = manifest['rootid'] + self.pathid = manifest.get( + 'pathid', '{}_{}'.format(self.name, self.rootid) + ) + self.dpath = ub.Path(manifest['dpath']) + self.fpath = ub.Path(manifest['fpath']) + self.size = manifest['size'] + self.gpus = manifest.get('gpus') + self.environ = {} + self.cmd_verbose = 2 + self._tmux_session_prefix = manifest.get('tmux_session_prefix', 'cmdq_') + self.job_info_dpath = self.dpath / 'job_info' + self.preamble = [] + self.jobs = [] + import types + + workers = [] + for w in manifest.get('workers', []): + worker = serial_queue.SerialQueue( + name=w['name'], + rootid=w['rootid'], + dpath=ub.Path(w['dpath']), + environ=w.get('environ') or {}, + ) + # Rehydrate lightweight job stubs so the monitor can show + # per-job failure rows. We don't need the full BashJob — only + # the attributes the failed-jobs renderer reads. + stubs = [] + for j in w.get('jobs') or []: + stubs.append( + types.SimpleNamespace( + name=j.get('name'), + log=bool(j.get('log', False)), + fail_fpath=ub.Path(j['fail_fpath']) + if j.get('fail_fpath') + else None, + skip_fpath=ub.Path(j['skip_fpath']) + if j.get('skip_fpath') + else None, + log_fpath=ub.Path(j['log_fpath']) + if j.get('log_fpath') + else None, + depends=list(j.get('depends') or []), + ) + ) + worker.jobs = stubs + workers.append(worker) + self.workers = workers + return self + + +def _attach_or_switch(session_name: str) -> None: + """Attach the user's terminal to ``session_name`` (or switch-client + if already inside tmux). Thin module-level shim around the static + method so call sites don't need to import the class.""" + tmux.attach_or_switch(session_name) + + +def _attach_hint_renderable(session_name: str) -> Any: + """Footer text shown beneath the live status table when an + attachable side-session exists, so the user can discover the + keybindings without reading the docs.""" + import os + + from rich.text import Text + + verb = 'switch-client' if os.environ.get('TMUX') else 'attach' + return Text.from_markup( + rf'[dim]Press \[a] to {verb} to monitor session ' + f"'{session_name}' • \\[q] to stop watching (queue keeps " + 'running)[/dim]' + ) + + +def _run_live_with_attach( + build_renderable: Any, + refresh_rate: float, + side_session: Optional[str], +) -> None: + """Run a ``rich.live.Live`` loop that also accepts ``a``/``q`` + keypresses when ``side_session`` is provided. + + The loop exits when the renderable's ``finished`` flag goes True + (queue done) or when the user presses ``q``. On ``a`` the Live + display is suspended, the user is attached to the side tmux + session, and the loop resumes after they detach. + """ + import sys + import time + + from rich.live import Live + + if side_session is None: + # Plain path with no input handling — preserves old behavior + # exactly when there is no side session to attach to. + renderable, finished, _ = build_renderable() + with Live(renderable, refresh_per_second=4) as live: + while not finished: + time.sleep(refresh_rate) + renderable, finished, _ = build_renderable() + live.update(renderable) + return + + import select + import termios + import tty + + fd = sys.stdin.fileno() + old_settings = termios.tcgetattr(fd) + try: + while True: + tty.setcbreak(fd) + attach_requested = False + renderable, finished, _ = build_renderable() + with Live(renderable, refresh_per_second=4) as live: + while not finished: + ready, _, _ = select.select( + [sys.stdin], [], [], refresh_rate + ) + if ready: + ch = sys.stdin.read(1) + if ch in ('a', 'A'): + attach_requested = True + break + if ch in ('q', 'Q'): + return + if ch == '\x03': # Ctrl-C + raise KeyboardInterrupt + renderable, finished, _ = build_renderable() + live.update(renderable) + if not attach_requested: + return + # Restore the terminal so tmux gets a clean tty, attach, + # then loop back into Live with cbreak re-enabled. + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + _attach_or_switch(side_session) + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + def has_stdin() -> bool: import sys + try: sys.stdin.fileno() except Exception: @@ -1003,10 +1653,11 @@ def has_stdin() -> bool: try: import textual # NOQA from cmd_queue.monitor_app import CmdQueueMonitorApp + if not hasattr(CmdQueueMonitorApp, 'run'): raise ImportError('Current textual monitor is broken on new versions') except ImportError: - CmdQueueMonitorApp = None + CmdQueueMonitorApp = None # type: ignore if 0: diff --git a/cmd_queue/util/__init__.py b/cmd_queue/util/__init__.py index 464dec2..ea91353 100644 --- a/cmd_queue/util/__init__.py +++ b/cmd_queue/util/__init__.py @@ -1,29 +1,32 @@ - def lazy_import(module_name, submodules, submod_attrs): import importlib import os + name_to_submod = { - func: mod for mod, funcs in submod_attrs.items() - for func in funcs + func: mod for mod, funcs in submod_attrs.items() for func in funcs } def __getattr__(name): if name in submodules: attr = importlib.import_module( '{module_name}.{name}'.format( - module_name=module_name, name=name) + module_name=module_name, name=name + ) ) elif name in name_to_submod: submodname = name_to_submod[name] module = importlib.import_module( '{module_name}.{submodname}'.format( - module_name=module_name, submodname=submodname) + module_name=module_name, submodname=submodname + ) ) attr = getattr(module, name) else: raise AttributeError( 'No {module_name} attribute {name}'.format( - module_name=module_name, name=name)) + module_name=module_name, name=name + ) + ) globals()[name] = attr return attr @@ -51,4 +54,5 @@ def __getattr__(name): def __dir__(): return __all__ + __all__ = ['textual_extensions', 'util_algo', 'util_networkx'] diff --git a/cmd_queue/util/richer.py b/cmd_queue/util/richer.py index 10b0d2d..915fbc9 100644 --- a/cmd_queue/util/richer.py +++ b/cmd_queue/util/richer.py @@ -23,28 +23,32 @@ def lazy_import( ) -> Callable[[str], Any]: import importlib import os + name_to_submod = { - func: mod for mod, funcs in submod_attrs.items() - for func in funcs + func: mod for mod, funcs in submod_attrs.items() for func in funcs } def __getattr__(name: str) -> Any: if name in submodules: attr = importlib.import_module( '{module_name}.{name}'.format( - module_name=module_name, name=name) + module_name=module_name, name=name + ) ) elif name in name_to_submod: submodname = name_to_submod[name] module = importlib.import_module( '{module_name}.{submodname}'.format( - module_name=module_name, submodname=submodname) + module_name=module_name, submodname=submodname + ) ) attr = getattr(module, name) else: raise AttributeError( 'No {module_name} attribute {name}'.format( - module_name=module_name, name=name)) + module_name=module_name, name=name + ) + ) globals()[name] = attr return attr @@ -125,14 +129,66 @@ def __getattr__(name: str) -> Any: def __dir__() -> list[str]: return __all__ -__all__: list[str] = ['abc', 'align', 'ansi', 'bar', 'box', 'cells', 'color', - 'color_triplet', 'columns', 'console', 'constrain', 'containers', - 'control', 'default_styles', 'diagnose', 'emoji', 'errors', - 'file_proxy', 'filesize', 'get_console', 'highlighter', 'inspect', - 'json', 'jupyter', 'layout', 'live', 'live_render', 'logging', - 'markdown', 'markup', 'measure', 'padding', 'pager', 'palette', - 'panel', 'pretty', 'print', 'progress', 'progress_bar', 'prompt', - 'protocol', 'reconfigure', 'region', 'repr', 'rule', 'scope', - 'screen', 'segment', 'spinner', 'status', 'style', 'styled', - 'syntax', 'table', 'terminal_theme', 'text', 'theme', 'themes', - 'traceback', 'tree'] + +__all__: list[str] = [ + 'abc', + 'align', + 'ansi', + 'bar', + 'box', + 'cells', + 'color', + 'color_triplet', + 'columns', + 'console', + 'constrain', + 'containers', + 'control', + 'default_styles', + 'diagnose', + 'emoji', + 'errors', + 'file_proxy', + 'filesize', + 'get_console', + 'highlighter', + 'inspect', + 'json', + 'jupyter', + 'layout', + 'live', + 'live_render', + 'logging', + 'markdown', + 'markup', + 'measure', + 'padding', + 'pager', + 'palette', + 'panel', + 'pretty', + 'print', + 'progress', + 'progress_bar', + 'prompt', + 'protocol', + 'reconfigure', + 'region', + 'repr', + 'rule', + 'scope', + 'screen', + 'segment', + 'spinner', + 'status', + 'style', + 'styled', + 'syntax', + 'table', + 'terminal_theme', + 'text', + 'theme', + 'themes', + 'traceback', + 'tree', +] diff --git a/cmd_queue/util/texter.py b/cmd_queue/util/texter.py index 0180539..aa6dff4 100644 --- a/cmd_queue/util/texter.py +++ b/cmd_queue/util/texter.py @@ -22,28 +22,32 @@ def lazy_import( ) -> Callable[[str], Any]: import importlib import os + name_to_submod = { - func: mod for mod, funcs in submod_attrs.items() - for func in funcs + func: mod for mod, funcs in submod_attrs.items() for func in funcs } def __getattr__(name: str) -> Any: if name in submodules: attr = importlib.import_module( '{module_name}.{name}'.format( - module_name=module_name, name=name) + module_name=module_name, name=name + ) ) elif name in name_to_submod: submodname = name_to_submod[name] module = importlib.import_module( '{module_name}.{submodname}'.format( - module_name=module_name, submodname=submodname) + module_name=module_name, submodname=submodname + ) ) attr = getattr(module, name) else: raise AttributeError( 'No {module_name} attribute {name}'.format( - module_name=module_name, name=name)) + module_name=module_name, name=name + ) + ) globals()[name] = attr return attr @@ -92,9 +96,30 @@ def __getattr__(name: str) -> Any: def __dir__() -> list[str]: return __all__ + __all__: list[str] = [ - 'actions', 'app', 'background', 'binding', 'case', 'driver', - 'drivers', 'events', 'geometry', 'keys', 'layout', 'layout_map', - 'layouts', 'message', 'message_pump', 'messages', 'page', - 'reactive', 'screen_update', 'scrollbar', 'view', 'views', 'widget', - 'widgets'] + 'actions', + 'app', + 'background', + 'binding', + 'case', + 'driver', + 'drivers', + 'events', + 'geometry', + 'keys', + 'layout', + 'layout_map', + 'layouts', + 'message', + 'message_pump', + 'messages', + 'page', + 'reactive', + 'screen_update', + 'scrollbar', + 'view', + 'views', + 'widget', + 'widgets', +] diff --git a/cmd_queue/util/textual_extensions.py b/cmd_queue/util/textual_extensions.py index 7d08690..0c1410c 100644 --- a/cmd_queue/util/textual_extensions.py +++ b/cmd_queue/util/textual_extensions.py @@ -1,27 +1,24 @@ from __future__ import annotations -# mypy: ignore-errors - from typing import Any -# from typing import Any - try: - from textual.app import App # from textual.driver import Driver # from typing import Type # from rich.console import Console import asyncio - - # from textual import events - from textual.widget import Widget - from textual.reactive import watch, Reactive from datetime import datetime + + from rich.console import RenderableType from rich.panel import Panel + from rich.repr import Result from rich.style import StyleType from rich.table import Table - from rich.console import RenderableType - from rich.repr import Result + from textual.app import App + from textual.reactive import Reactive, watch + + # from textual import events + from textual.widget import Widget except ImportError: App: type = object Widget: type = object @@ -47,12 +44,15 @@ class class_or_instancemethod(classmethod): >>> print(X().foo()) bound to the instance """ - def __get__(self, instance: Any, type_: Any) -> Any: - descr_get = super().__get__ if instance is None else self.__func__.__get__ + + def __get__(self, instance: Any, type_: Any) -> Any: # type: ignore + descr_get = ( + super().__get__ if instance is None else self.__func__.__get__ # type: ignore + ) return descr_get(instance, type_) -class InstanceRunnableApp(App): +class InstanceRunnableApp(App): # type: ignore """ Extension of App that allows for running an instance @@ -95,6 +95,7 @@ def _run_as_cls( """ Original classmethod logic """ + async def run_app() -> None: app = cls(screen=screen, driver_class=driver, **kwargs) await app.process_messages() @@ -117,16 +118,19 @@ def _run_as_instance( if kwargs.get('title', None) is not None: self._title = kwargs.pop('title') if kwargs.get('log', None) is not None: - self.log_file = open(kwargs.pop('log'), "wt") + self.log_file = open(kwargs.pop('log'), 'wt') if kwargs.get('log_verbosity', None) is not None: self.log_verbosity = kwargs.pop('log_verbosity') if len(kwargs): raise ValueError( 'Cannot pass unhandled kwargs when running as an ' 'instance method. Assuming that instance variables ' - 'are already setup.') + 'are already setup.' + ) + async def run_app() -> None: await self.process_messages() + asyncio.run(run_app()) # Allow for use of run as a instance or classmethod @@ -146,23 +150,22 @@ def run( """ if isinstance(cls_or_self, type): # Running as a class method - cls_or_self._run_as_cls( - screen=screen, driver=driver, **kwargs) + cls_or_self._run_as_cls(screen=screen, driver=driver, **kwargs) else: # Running as an instance method - cls_or_self._run_as_instance( - screen=screen, driver=driver, **kwargs) + cls_or_self._run_as_instance(screen=screen, driver=driver, **kwargs) try: - class ExtHeader(Widget): - """ - """ + + class ExtHeader(Widget): # type: ignore + """ """ + def __init__( self, *, tall: bool = True, - style: str = "white on dark_green", + style: str = 'white on dark_green', clock: bool = True, ) -> None: """ @@ -175,14 +178,18 @@ def __init__( self.clock = clock tall: Reactive[bool] = Reactive(True, layout=True) - style: Reactive[StyleType] = Reactive("white on blue") + style: Reactive[StyleType] = Reactive('white on blue') clock: Reactive[bool] = Reactive(True) - title: Reactive[str] = Reactive("") - sub_title: Reactive[str] = Reactive("") + title: Reactive[str] = Reactive('') + sub_title: Reactive[str] = Reactive('') @property def full_title(self) -> str: - return f"{self.title} - {self.sub_title}" if self.sub_title else self.title + return ( + f'{self.title} - {self.sub_title}' + if self.sub_title + else self.title + ) def __rich_repr__(self) -> Result: yield self.title @@ -191,19 +198,23 @@ async def watch_tall(self, tall: bool) -> None: self.layout_size = 3 if tall else 1 def get_clock(self) -> str: - return datetime.now().time().strftime("%X") + return datetime.now().time().strftime('%X') def render(self) -> RenderableType: header_table = Table.grid(padding=(0, 1), expand=True) header_table.style = self.style - header_table.add_column(justify="left", ratio=0, width=8) - header_table.add_column("title", justify="center", ratio=1) - header_table.add_column("clock", justify="right", width=8) + header_table.add_column(justify='left', ratio=0, width=8) + header_table.add_column('title', justify='center', ratio=1) + header_table.add_column('clock', justify='right', width=8) header_table.add_row( - "⚡", self.full_title, self.get_clock() if self.clock else "" + '⚡', self.full_title, self.get_clock() if self.clock else '' ) header: RenderableType - header = Panel(header_table, style=self.style) if self.tall else header_table + header = ( + Panel(header_table, style=self.style) + if self.tall + else header_table + ) return header async def on_mount(self, event: Any) -> None: @@ -219,8 +230,8 @@ async def set_title(title: str) -> None: async def set_sub_title(sub_title: str) -> None: self.sub_title = sub_title - watch(self.app, "title", set_title) - watch(self.app, "sub_title", set_sub_title) + watch(self.app, 'title', set_title) + watch(self.app, 'sub_title', set_sub_title) async def on_click(self, event: Any) -> None: """ @@ -229,4 +240,4 @@ async def on_click(self, event: Any) -> None: """ self.tall = not self.tall except Exception: - ExtHeader = None + ExtHeader = None # type: ignore diff --git a/cmd_queue/util/util_algo.py b/cmd_queue/util/util_algo.py index 8e7a802..b32ff20 100644 --- a/cmd_queue/util/util_algo.py +++ b/cmd_queue/util/util_algo.py @@ -1,11 +1,14 @@ from __future__ import annotations +from collections.abc import Sequence from typing import List import numpy as np -def balanced_number_partitioning(items: np.ndarray, num_parts: int) -> List[np.ndarray]: +def balanced_number_partitioning( + items: np.ndarray | Sequence, num_parts: int +) -> List[np.ndarray]: """ Greedy approximation to multiway number partitioning diff --git a/cmd_queue/util/util_bash.py b/cmd_queue/util/util_bash.py index 0757bf7..3f0e497 100644 --- a/cmd_queue/util/util_bash.py +++ b/cmd_queue/util/util_bash.py @@ -38,15 +38,11 @@ def bash_json_dump(json_fmt_parts, fpath): \ > out.json """ - printf_body_parts = [ - '"{}": {}'.format(k, f) for k, f, v in json_fmt_parts - ] - printf_arg_parts = [ - '"{}"'.format(v) for k, f, v in json_fmt_parts - ] - printf_body = r"'{" + ", ".join(printf_body_parts) + r"}\n'" + printf_body_parts = ['"{}": {}'.format(k, f) for k, f, v in json_fmt_parts] + printf_arg_parts = ['"{}"'.format(v) for k, f, v in json_fmt_parts] + printf_body = r"'{" + ', '.join(printf_body_parts) + r"}\n'" printf_args = ' '.join(printf_arg_parts) redirect_part = '> ' + str(fpath) - printf_part = 'printf ' + printf_body + ' \\\n ' + printf_args + printf_part = 'printf ' + printf_body + ' \\\n ' + printf_args dump_code = printf_part + ' \\\n ' + redirect_part return dump_code diff --git a/cmd_queue/util/util_networkx.py b/cmd_queue/util/util_networkx.py index 747cc52..f2cb315 100644 --- a/cmd_queue/util/util_networkx.py +++ b/cmd_queue/util/util_networkx.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Iterable, Hashable +from typing import Any, Hashable, Iterable def is_topological_order(graph: Any, node_order: Iterable[Hashable]) -> bool: diff --git a/cmd_queue/util/util_tags.py b/cmd_queue/util/util_tags.py index baf58d0..8a0f459 100644 --- a/cmd_queue/util/util_tags.py +++ b/cmd_queue/util/util_tags.py @@ -30,6 +30,7 @@ def coerce( def intersection(self, other: Optional[Iterable[str]]) -> Optional[Tags]: import ubelt as ub + if other is None: return None isect = self.__class__(ub.oset(self) & set(other)) diff --git a/cmd_queue/util/util_tmux.py b/cmd_queue/util/util_tmux.py index 55a50fb..c61e32d 100644 --- a/cmd_queue/util/util_tmux.py +++ b/cmd_queue/util/util_tmux.py @@ -3,7 +3,7 @@ """ Generic tmux helpers """ -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional import ubelt as ub @@ -28,10 +28,7 @@ def list_sessions() -> List[Dict[str, str]]: line = line.strip() if line: session_id, rest = line.split(':', 1) - sessions.append({ - 'id': session_id, - 'rest': rest - }) + sessions.append({'id': session_id, 'rest': rest}) return sessions @staticmethod @@ -45,16 +42,220 @@ def _capture_pane_command(target_session: str) -> str: @staticmethod def capture_pane(target_session: str, verbose: int = 3) -> Any: - return ub.cmd(tmux._capture_pane_command(target_session), verbose=verbose) + return ub.cmd( + tmux._capture_pane_command(target_session), verbose=verbose + ) @staticmethod def kill_session(target_session: str, verbose: int = 3) -> Any: - return ub.cmd(tmux._kill_session_command(target_session), verbose=verbose) + return ub.cmd( + tmux._kill_session_command(target_session), verbose=verbose + ) @staticmethod def kill_pane(pane_id: str, verbose: int = 3) -> Any: return ub.cmd(f'tmux kill-pane -t {pane_id}', verbose=verbose) + @staticmethod + def is_inside() -> bool: + """True if the current process is running inside a tmux session.""" + import os + + return bool(os.environ.get('TMUX')) + + @staticmethod + def has_session(target_session: str) -> bool: + info = ub.cmd(['tmux', 'has-session', '-t', target_session]) + return info['ret'] == 0 + + @staticmethod + def spawn_monitor_session( + session_name: str, + manifest_path: Any, + attach: bool = True, + verbose: int = 0, + extra_args: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Start ``cmd_queue monitor --manifest=`` in a detached tmux + session and (optionally) attach the user to it. + + Returns a dict describing what was created and how to reattach. + """ + import os + import shlex + import sys + + if not ub.find_exe('tmux'): + raise RuntimeError('tmux is not available') + + # Always invoke the same Python interpreter that started run() — a + # globally-installed older ``cmd_queue`` binary on PATH would not + # know about the monitor subcommand. + cmd_parts = [ + sys.executable, + '-m', + 'cmd_queue', + 'monitor', + '--manifest=' + str(manifest_path), + ] + if extra_args: + cmd_parts.extend(extra_args) + # After the monitor exits, drop into an interactive shell so the + # pane stays alive and the user can scroll up to read the final + # status table without a synthetic prompt blocking dismissal. + inner = ' '.join(shlex.quote(p) for p in cmd_parts) + bash_payload = f'{inner}; exec bash' + new_session_cmd = [ + 'tmux', + 'new-session', + '-d', + '-s', + session_name, + 'bash', + '-lc', + bash_payload, + ] + ub.cmd(new_session_cmd, verbose=verbose, check=True) + + info: Dict[str, Any] = { + 'session_name': session_name, + 'attach_command': f'tmux attach -t {session_name}', + } + if attach: + inside = bool(os.environ.get('TMUX')) + if inside: + # Switching the current client is the in-tmux equivalent of + # attach; spawning a nested attach is rejected by tmux. + ub.cmd( + ['tmux', 'switch-client', '-t', session_name], + verbose=verbose, + check=True, + ) + info['attached_via'] = 'switch-client' + else: + # ``attach-session`` is interactive, so let the foreground + # process inherit the tty. + ub.cmd( + ['tmux', 'attach-session', '-t', session_name], + verbose=verbose, + check=False, + ) + info['attached_via'] = 'attach-session' + return info + + @staticmethod + def block_with_attach_prompt( + session_name: str, + is_finished_fn: Any, + refresh_rate: float = 1.0, + label: str = 'queue', + ) -> None: + """ + Block until ``is_finished_fn()`` returns truthy, while letting the + user press ``a`` to attach (or switch) to the given tmux session + and ``q`` / ``d`` to stop watching from the parent shell. + + On a non-TTY stdin (e.g. piped invocation, CI), falls back to a + silent polling loop. + + Args: + session_name: target tmux session for the attach action. + is_finished_fn: zero-arg callable returning True when the + queue is done. + refresh_rate: how often (seconds) to re-check completion and + poll for keypresses. + label: short noun used in the user-facing prompt. + """ + import os + import sys + import time + + if not sys.stdin.isatty(): + while not is_finished_fn(): + time.sleep(refresh_rate) + return + + import select + import termios + import tty + + inside_tmux = bool(os.environ.get('TMUX')) + attach_cmd = ( + f'tmux switch-client -t {session_name}' + if inside_tmux + else f'tmux attach -t {session_name}' + ) + print(f'Watching {label}.') + import rich + + rich.print( + rf'[bold]Press \[a][/bold] to attach to monitor session ({session_name})' + ) + rich.print( + r'[bold]Press \[q][/bold] to stop watching (queue keeps running).' + ) + print(f'Manual reattach anytime from another shell:\n{attach_cmd}') + + fd = sys.stdin.fileno() + old_settings = termios.tcgetattr(fd) + try: + tty.setcbreak(fd) + while True: + if is_finished_fn(): + return + ready, _, _ = select.select([sys.stdin], [], [], refresh_rate) + if not ready: + continue + ch = sys.stdin.read(1) + if ch in ('a', 'A'): + # Restore terminal before tmux takes over the tty. + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + try: + if inside_tmux: + ub.cmd( + ['tmux', 'switch-client', '-t', session_name], + check=False, + ) + else: + ub.cmd( + ['tmux', 'attach-session', '-t', session_name], + check=False, + ) + finally: + # Re-enter cbreak when the user detaches back. + tty.setcbreak(fd) + elif ch in ('q', 'Q', 'd', 'D'): + return + elif ch == '\x03': # Ctrl-C + raise KeyboardInterrupt + except KeyboardInterrupt: + return + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + + @staticmethod + def attach_or_switch(session_name: str) -> None: + """Bring ``session_name`` to the foreground for the user. + + Inside an existing tmux client, this issues ``switch-client`` so + we don't try to nest tmux. Otherwise we ``attach-session`` and + let the foreground process inherit the tty (the user can detach + with the usual binding to come back). + """ + import os + + if os.environ.get('TMUX'): + ub.cmd( + ['tmux', 'switch-client', '-t', session_name], + check=False, + ) + else: + ub.cmd( + ['tmux', 'attach-session', '-t', session_name], + check=False, + ) + @staticmethod def list_panes(target_session: str) -> List[Dict[str, str]]: """ @@ -68,50 +269,60 @@ def list_panes(target_session: str) -> List[Dict[str, str]]: print(f'rows = {ub.urepr(rows, nl=1)}') """ import json + # References: # https://github.com/tmux-python/libtmux/blob/f705713c7aff1b14e8f8f3ca53d1b0b6ba6e98d0/src/libtmux/formats.py#L80 PANE_FORMATS = [ - "pane_id", - "pane_index", - "pane_pid", - - "pane_active", - "pane_dead", - "pane_in_mode", - "pane_synchronized", - "pane_tty", - "pane_start_command", - "pane_start_path", - "pane_current_path", - "pane_current_command", - "cursor_x", - "cursor_y", - "scroll_region_upper", - "scroll_region_lower", - "saved_cursor_x", - "saved_cursor_y", - "alternate_on", - "alternate_saved_x", - "alternate_saved_y", - "cursor_flag", - "insert_flag", - "keypad_cursor_flag", - "keypad_flag", - "wrap_flag", - "mouse_standard_flag", - "mouse_button_flag", - "mouse_any_flag", - "mouse_utf8_flag", - "history_size", - "history_limit", - "history_bytes", - "pane_width", - "pane_height", + 'pane_id', + 'pane_index', + 'pane_pid', + 'pane_active', + 'pane_dead', + 'pane_in_mode', + 'pane_synchronized', + 'pane_tty', + 'pane_start_command', + 'pane_start_path', + 'pane_current_path', + 'pane_current_command', + 'cursor_x', + 'cursor_y', + 'scroll_region_upper', + 'scroll_region_lower', + 'saved_cursor_x', + 'saved_cursor_y', + 'alternate_on', + 'alternate_saved_x', + 'alternate_saved_y', + 'cursor_flag', + 'insert_flag', + 'keypad_cursor_flag', + 'keypad_flag', + 'wrap_flag', + 'mouse_standard_flag', + 'mouse_button_flag', + 'mouse_any_flag', + 'mouse_utf8_flag', + 'history_size', + 'history_limit', + 'history_bytes', + 'pane_width', + 'pane_height', # "pane_title", # removed in 3.1+ ] format_code = json.dumps({k: '#{' + k + '}' for k in PANE_FORMATS}) rows = [] - out: Any = ub.cmd(['tmux', 'list-panes', '-t', str(target_session), '-F', format_code], verbose=0) + out: Any = ub.cmd( + [ + 'tmux', + 'list-panes', + '-t', + str(target_session), + '-F', + format_code, + ], + verbose=0, + ) for line in out.stdout.strip().split('\n'): row = json.loads(line) rows.append(row) diff --git a/cmd_queue/util/util_yaml.py b/cmd_queue/util/util_yaml.py index 265f9c0..c247912 100644 --- a/cmd_queue/util/util_yaml.py +++ b/cmd_queue/util/util_yaml.py @@ -1,20 +1,21 @@ import io import os -import ubelt as ub +import ubelt as ub NEW_RUAMEL = 1 class _YamlRepresenter: - @staticmethod def str_presenter(dumper, data): # https://stackoverflow.com/questions/8640959/how-can-i-control-what-scalar-form-pyyaml-uses-for-my-data if len(data.splitlines()) > 1 or '\n' in data: text_list = [line.rstrip() for line in data.splitlines()] fixed_data = '\n'.join(text_list) - return dumper.represent_scalar('tag:yaml.org,2002:str', fixed_data, style='|') + return dumper.represent_scalar( + 'tag:yaml.org,2002:str', fixed_data, style='|' + ) return dumper.represent_scalar('tag:yaml.org,2002:str', data) @@ -29,6 +30,7 @@ def _custom_ruaml_loader(): https://stackoverflow.com/questions/76870413/using-a-custom-loader-with-ruamel-yaml-0-15-0 """ import ruamel.yaml + Loader = ruamel.yaml.RoundTripLoader def _construct_include_tag(self, node): @@ -38,10 +40,13 @@ def _construct_include_tag(self, node): else: external_fpath = ub.Path(node.value) if not external_fpath.exists(): - raise IOError(f'Included external yaml file {external_fpath} ' - 'does not exist') + raise IOError( + f'Included external yaml file {external_fpath} ' + 'does not exist' + ) return Yaml.load(node.value) - Loader.add_constructor("!include", _construct_include_tag) + + Loader.add_constructor('!include', _construct_include_tag) return Loader @@ -52,6 +57,7 @@ def _custom_ruaml_dumper(): https://stackoverflow.com/questions/59635900/ruamel-yaml-custom-commentedmapping-for-custom-tags """ import ruamel.yaml + Dumper = ruamel.yaml.RoundTripDumper Dumper.add_representer(str, _YamlRepresenter.str_presenter) Dumper.add_representer(ub.udict, Dumper.represent_dict) @@ -64,6 +70,7 @@ def _custom_pyaml_dumper(): class Dumper(yaml.Dumper): pass + # dumper = yaml.dumper.Dumper # dumper = yaml.SafeDumper(sort_keys=False) # yaml.dump(data, s, Dumper=yaml.SafeDumper, sort_keys=False, width=float("inf")) @@ -102,16 +109,19 @@ def _custom_new_ruaml_yaml_obj(): >>> print(file.getvalue()) """ import ruamel.yaml + # make a new instance, although you could get the YAML # instance from the constructor argument - class CustomConstructor(ruamel.yaml.constructor.RoundTripConstructor): + class CustomConstructor(ruamel.yaml.constructor.RoundTripConstructor): # type: ignore ... - class CustomRepresenter(ruamel.yaml.representer.RoundTripRepresenter): + class CustomRepresenter(ruamel.yaml.representer.RoundTripRepresenter): # type: ignore ... CustomRepresenter.add_representer(str, _YamlRepresenter.str_presenter) - CustomRepresenter.add_representer(ub.udict, CustomRepresenter.represent_dict) + CustomRepresenter.add_representer( + ub.udict, CustomRepresenter.represent_dict + ) def _construct_include_tag(self, node): print(f'node={node}') @@ -122,14 +132,17 @@ def _construct_include_tag(self, node): else: external_fpath = ub.Path(value) if not external_fpath.exists(): - raise IOError(f'Included external yaml file {external_fpath} ' - 'does not exist') + raise IOError( + f'Included external yaml file {external_fpath} ' + 'does not exist' + ) # Not sure why we can't recurse here... # yaml_obj # print(f'yaml_obj={yaml_obj}') # import xdev # xdev.embed() return Yaml.load(value) + # Loader = ruamel.yaml.RoundTripLoader # Loader.add_constructor("!include", _construct_include_tag) @@ -139,7 +152,7 @@ def _construct_include_tag(self, node): yaml_obj.Constructor = CustomConstructor yaml_obj.Representer = CustomRepresenter yaml_obj.preserve_quotes = True - yaml_obj.width = float('inf') + yaml_obj.width = float('inf') # type: ignore return yaml_obj @@ -180,12 +193,18 @@ def dumps(data, backend='ruamel'): yaml_obj.dump(data, file) else: import ruamel.yaml + Dumper = _custom_ruaml_dumper() - ruamel.yaml.round_trip_dump(data, file, Dumper=Dumper, width=float("inf")) + ruamel.yaml.round_trip_dump( + data, file, Dumper=Dumper, width=float('inf') + ) elif backend == 'pyyaml': import yaml + Dumper = _custom_pyaml_dumper() - yaml.dump(data, file, Dumper=Dumper, sort_keys=False, width=float("inf")) + yaml.dump( + data, file, Dumper=Dumper, sort_keys=False, width=float('inf') + ) else: raise KeyError(backend) text = file.getvalue() @@ -227,6 +246,7 @@ def load(file, backend='ruamel'): else: if backend == 'ruamel': import ruamel.yaml # NOQA + # TODO: seems like there will be a deprecation # from ruamel.yaml import YAML if NEW_RUAMEL: @@ -236,10 +256,13 @@ def load(file, backend='ruamel'): # yaml = YAML(typ='unsafe', pure=True) # data = yaml.load(file, Loader=Loader, preserve_quotes=True) Loader = _custom_ruaml_loader() - data = ruamel.yaml.load(file, Loader=Loader, preserve_quotes=True) + data = ruamel.yaml.load( + file, Loader=Loader, preserve_quotes=True + ) # data = ruamel.yaml.load(file, Loader=ruamel.yaml.RoundTripLoader, preserve_quotes=True) elif backend == 'pyyaml': import yaml + # data = yaml.load(file, Loader=yaml.SafeLoader) data = yaml.load(file, Loader=yaml.Loader) else: @@ -389,7 +412,8 @@ def InlineList(items): .. [SO56937691] https://stackoverflow.com/questions/56937691/making-yaml-ruamel-yaml-always-dump-lists-inline """ import ruamel.yaml - ret = ruamel.yaml.comments.CommentedSeq(items) + + ret = ruamel.yaml.comments.CommentedSeq(items) # type: ignore ret.fa.set_flow_style() return ret @@ -408,10 +432,12 @@ def Dict(data): >>> print(Yaml.dumps(data)) """ import ruamel.yaml - ret = ruamel.yaml.comments.CommentedMap(data) + + ret = ruamel.yaml.comments.CommentedMap(data) # type: ignore return ret @staticmethod def CodeBlock(text): import ruamel.yaml - return ruamel.yaml.scalarstring.LiteralScalarString(ub.codeblock(text)) + + return ruamel.yaml.scalarstring.LiteralScalarString(ub.codeblock(text)) # type: ignore diff --git a/dev/_devcheck_rich.py b/dev/_devcheck_rich.py index 8a5127f..3b38891 100644 --- a/dev/_devcheck_rich.py +++ b/dev/_devcheck_rich.py @@ -5,13 +5,16 @@ Cant do this with pure rich https://github.com/Textualize/rich/issues/2120 """ -from rich.table import Table -from rich.live import Live + import time +from rich.live import Live +from rich.table import Table + def random_rich_table(): import random + r = random.random() columns = ['name', 'status', 'finished', 'errors', 'total'] table = Table() @@ -42,6 +45,7 @@ def simple_update_no_pager(): def simple_pager_no_update(): from rich.console import Console + console = Console() table = random_rich_table() with console.pager(): @@ -51,8 +55,8 @@ def simple_pager_no_update(): def combined_scrolling_table(): from textual import events from textual.app import App - from textual.widgets import ScrollView from textual.widget import Widget + from textual.widgets import ScrollView class JobTable(Widget): def on_mount(self): @@ -66,10 +70,9 @@ class MyApp(App): """An example of a very simple Textual App""" async def on_load(self, event: events.Load) -> None: - await self.bind("q", "quit", "Quit") + await self.bind('q', 'quit', 'Quit') async def on_mount(self, event: events.Mount) -> None: - self.body = body = ScrollView(auto_width=True) await self.view.dock(body) @@ -80,7 +83,7 @@ async def add_content(): await self.call_later(add_content) - MyApp.run(title="Simple App", log="textual.log") + MyApp.run(title='Simple App', log='textual.log') if __name__ == '__main__': diff --git a/dev/setup_secrets.sh b/dev/setup_secrets.sh index 0a8efc9..ee607ab 100644 --- a/dev/setup_secrets.sh +++ b/dev/setup_secrets.sh @@ -139,6 +139,8 @@ setup_package_environs_github_erotemic(){ export VARNAME_TWINE_PASSWORD="EROTEMIC_PYPI_MASTER_TOKEN" export VARNAME_TEST_TWINE_PASSWORD="EROTEMIC_TEST_PYPI_MASTER_TOKEN" export VARNAME_TWINE_USERNAME="EROTEMIC_PYPI_MASTER_TOKEN_USERNAME" + export GITHUB_ENVIRONMENT_PYPI="pypi" + export GITHUB_ENVIRONMENT_TESTPYPI="testpypi" export VARNAME_TEST_TWINE_USERNAME="EROTEMIC_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=Erotemic-CI " ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh @@ -151,6 +153,8 @@ setup_package_environs_github_pyutils(){ export VARNAME_TWINE_PASSWORD="PYUTILS_PYPI_MASTER_TOKEN" export VARNAME_TEST_TWINE_PASSWORD="PYUTILS_TEST_PYPI_MASTER_TOKEN" export VARNAME_TWINE_USERNAME="PYUTILS_PYPI_MASTER_TOKEN_USERNAME" + export GITHUB_ENVIRONMENT_PYPI="pypi" + export GITHUB_ENVIRONMENT_TESTPYPI="testpypi" export VARNAME_TEST_TWINE_USERNAME="PYUTILS_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=PyUtils-CI " ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh @@ -162,21 +166,138 @@ setup_package_environs_github_pyutils(){ #' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh } +resolve_secret_value_from_varname_ptr(){ + local secret_varname_ptr="$1" + local secret_name="$2" + local secret_varname="${!secret_varname_ptr}" + if [[ "$secret_varname" == "" ]]; then + echo "Skipping $secret_name because $secret_varname_ptr is unset" >&2 + return 1 + fi + local secret_value="${!secret_varname}" + if [[ "$secret_value" == "" ]]; then + echo "Skipping $secret_name because $secret_varname is unset or empty" >&2 + return 1 + fi + printf '%s' "$secret_value" +} + +upload_one_github_secret(){ + local secret_name="$1" + local secret_value="$2" + local environment_name="${3:-}" + if [[ "$environment_name" == "" ]]; then + gh secret set "$secret_name" -b"$secret_value" + else + gh secret set "$secret_name" --env "$environment_name" -b"$secret_value" + fi +} + +github_repo_full_name(){ + local remote_url + remote_url="$(git remote get-url origin)" + if [[ "$remote_url" == git@github.com:* ]]; then + printf '%s' "${remote_url#git@github.com:}" | sed 's/\.git$//' + elif [[ "$remote_url" == https://github.com/* ]]; then + printf '%s' "${remote_url#https://github.com/}" | sed 's/\.git$//' + else + echo "Unable to determine GitHub repo from origin: $remote_url" >&2 + return 1 + fi +} + +ensure_github_environment(){ + local environment_name="$1" + local repo_full_name + repo_full_name="$(github_repo_full_name)" || return 1 + gh api --method PUT \ + -H "Accept: application/vnd.github+json" \ + "/repos/${repo_full_name}/environments/${environment_name}" >/dev/null +} + +setup_github_release_environments(){ + source dev/secrets_configuration.sh + local repo_full_name + local pypi_env + local testpypi_env + repo_full_name="$(github_repo_full_name)" || return 1 + pypi_env="${GITHUB_ENVIRONMENT_PYPI:-pypi}" + testpypi_env="${GITHUB_ENVIRONMENT_TESTPYPI:-testpypi}" + + ensure_github_environment "$testpypi_env" + ensure_github_environment "$pypi_env" + + echo "Ensured GitHub environments exist:" + echo " - $testpypi_env" + echo " - $pypi_env" + echo "Review environment protection rules manually as needed:" + echo " https://github.com/${repo_full_name}/settings/environments" + echo "Suggested policy:" + echo " - ${testpypi_env}: usually no approval required" + echo " - ${pypi_env}: require approval / reviewers and restrict to release refs" +} + upload_github_secrets(){ + local mode="${1:-legacy}" load_secrets unset GITHUB_TOKEN #printf "%s" "$GITHUB_TOKEN" | gh auth login --hostname Github.com --with-token if ! gh auth status ; then gh auth login fi + local secret_value + local pypi_env + local testpypi_env source dev/secrets_configuration.sh - gh secret set "TWINE_USERNAME" -b"${!VARNAME_TWINE_USERNAME}" - gh secret set "TEST_TWINE_USERNAME" -b"${!VARNAME_TEST_TWINE_USERNAME}" - toggle_setx_enter - gh secret set "CI_SECRET" -b"${!VARNAME_CI_SECRET}" - gh secret set "TWINE_PASSWORD" -b"${!VARNAME_TWINE_PASSWORD}" - gh secret set "TEST_TWINE_PASSWORD" -b"${!VARNAME_TEST_TWINE_PASSWORD}" - toggle_setx_exit + + if [[ "$mode" == "trusted_publishing" ]]; then + pypi_env="${GITHUB_ENVIRONMENT_PYPI:-pypi}" + testpypi_env="${GITHUB_ENVIRONMENT_TESTPYPI:-testpypi}" + setup_github_release_environments + toggle_setx_enter + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_CI_SECRET CI_SECRET) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "CI_SECRET" "$secret_value" "$pypi_env" + upload_one_github_secret "CI_SECRET" "$secret_value" "$testpypi_env" + fi + toggle_setx_exit + elif [[ "$mode" == "direct_gpg" ]]; then + # direct_ci GPG transport + non-trusted publishing. + # GPG material is already uploaded by upload_github_gpg_secrets. + # Upload Twine credentials environment-scoped (live password to pypi + # env, test password to testpypi env). CI_SECRET is not uploaded. + pypi_env="${GITHUB_ENVIRONMENT_PYPI:-pypi}" + testpypi_env="${GITHUB_ENVIRONMENT_TESTPYPI:-testpypi}" + setup_github_release_environments + toggle_setx_enter + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TWINE_USERNAME TWINE_USERNAME) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "TWINE_USERNAME" "$secret_value" "$pypi_env" + upload_one_github_secret "TWINE_USERNAME" "$secret_value" "$testpypi_env" + fi + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TEST_TWINE_USERNAME TEST_TWINE_USERNAME) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "TEST_TWINE_USERNAME" "$secret_value" "$testpypi_env" + fi + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TWINE_PASSWORD TWINE_PASSWORD) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "TWINE_PASSWORD" "$secret_value" "$pypi_env" + fi + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TEST_TWINE_PASSWORD TEST_TWINE_PASSWORD) || true + if [[ "$secret_value" != "" ]]; then + upload_one_github_secret "TEST_TWINE_PASSWORD" "$secret_value" "$testpypi_env" + fi + toggle_setx_exit + else + # Legacy mode: all secrets repo-level, CI_SECRET included. + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TWINE_USERNAME TWINE_USERNAME) && upload_one_github_secret "TWINE_USERNAME" "$secret_value" + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TEST_TWINE_USERNAME TEST_TWINE_USERNAME) && upload_one_github_secret "TEST_TWINE_USERNAME" "$secret_value" + toggle_setx_enter + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_CI_SECRET CI_SECRET) && upload_one_github_secret "CI_SECRET" "$secret_value" + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TWINE_PASSWORD TWINE_PASSWORD) && upload_one_github_secret "TWINE_PASSWORD" "$secret_value" + secret_value=$(resolve_secret_value_from_varname_ptr VARNAME_TEST_TWINE_PASSWORD TEST_TWINE_PASSWORD) && upload_one_github_secret "TEST_TWINE_PASSWORD" "$secret_value" + toggle_setx_exit + fi } @@ -224,15 +345,15 @@ upload_gitlab_group_secrets(){ fi TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX) - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" GROUP_ID=$(< "$TMP_DIR/all_group_info" jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") echo "GROUP_ID = $GROUP_ID" - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" < "$TMP_DIR/group_info" jq # Get group-level secret variables - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" > "$TMP_DIR/group_vars" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" > "$TMP_DIR/group_vars" < "$TMP_DIR/group_vars" jq '.[] | .key' if [[ "$?" != "0" ]]; then @@ -260,20 +381,26 @@ upload_gitlab_group_secrets(){ echo "Remove variable does not exist, posting" toggle_setx_enter - curl --request POST --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" \ - --form "key=${SECRET_VARNAME}" \ - --form "value=${LOCAL_VALUE}" \ - --form "protected=true" \ - --form "masked=true" \ - --form "environment_scope=*" \ - --form "variable_type=env_var" + curl --fail --silent --show-error \ + --request POST --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" \ + --form "key=${SECRET_VARNAME}" \ + --form "value=${LOCAL_VALUE}" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" toggle_setx_exit elif [[ "$REMOTE_VALUE" != "$LOCAL_VALUE" ]]; then echo "Remove variable does not agree, putting" # Update variable value toggle_setx_enter - curl --request PUT --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables/$SECRET_VARNAME" \ - --form "value=${LOCAL_VALUE}" + curl --fail --silent --show-error \ + --request PUT --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables/$SECRET_VARNAME" \ + --form "value=${LOCAL_VALUE}" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" toggle_setx_exit else echo "Remote value agrees with local" @@ -305,13 +432,13 @@ upload_gitlab_repo_secrets(){ TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX) toggle_setx_enter - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" toggle_setx_exit GROUP_ID=$(< "$TMP_DIR/all_group_info" jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") echo "GROUP_ID = $GROUP_ID" toggle_setx_enter - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" toggle_setx_exit GROUP_ID=$(< "$TMP_DIR/all_group_info" jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") < "$TMP_DIR/group_info" jq @@ -321,16 +448,25 @@ upload_gitlab_repo_secrets(){ # Get group-level secret variables toggle_setx_enter - curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables" > "$TMP_DIR/project_vars" + curl --fail --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables" > "$TMP_DIR/project_vars" toggle_setx_exit < "$TMP_DIR/project_vars" jq '.[] | .key' if [[ "$?" != "0" ]]; then echo "Failed to access project level variables. Probably a permission issue" fi + local mode="${1:-legacy}" + LIVE_MODE=1 source dev/secrets_configuration.sh - SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) + if [[ "$mode" == "direct_gpg" ]]; then + # In direct_ci transport mode the GPG key material is uploaded as + # project-level secrets by upload_gitlab_gpg_secrets; CI_SECRET is not + # needed. Only Twine and push-token secrets are uploaded here. + SECRET_VARNAME_ARR=(VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) + else + SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) + fi for SECRET_VARNAME_PTR in "${SECRET_VARNAME_ARR[@]}"; do SECRET_VARNAME=${!SECRET_VARNAME_PTR} echo "" @@ -349,13 +485,16 @@ upload_gitlab_repo_secrets(){ # New variable echo "Remove variable does not exist, posting" if [[ "$LIVE_MODE" == "1" ]]; then - curl --request POST --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables" \ - --form "key=${SECRET_VARNAME}" \ - --form "value=${LOCAL_VALUE}" \ - --form "protected=true" \ - --form "masked=true" \ - --form "environment_scope=*" \ - --form "variable_type=env_var" + curl --fail --silent --show-error \ + --request POST \ + --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables" \ + --form "key=${SECRET_VARNAME}" \ + --form "value=${LOCAL_VALUE}" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" else echo "dry run, not posting" fi @@ -363,8 +502,15 @@ upload_gitlab_repo_secrets(){ echo "Remove variable does not agree, putting" # Update variable value if [[ "$LIVE_MODE" == "1" ]]; then - curl --request PUT --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables/$SECRET_VARNAME" \ - --form "value=${LOCAL_VALUE}" + curl --fail --silent --show-error \ + --request PUT \ + --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables/$SECRET_VARNAME" \ + --form "value=${LOCAL_VALUE}" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" else echo "dry run, not putting" fi @@ -393,7 +539,10 @@ export_encrypted_code_signing_keys(){ # HOW TO ENCRYPT YOUR SECRET GPG KEY # You need to have a known public gpg key for this to make any sense - MAIN_GPG_KEYID=$(gpg --list-keys --keyid-format LONG "$GPG_IDENTIFIER" | head -n 2 | tail -n 1 | awk '{print $1}') + # Full primary-key fingerprint (40 hex chars) — more collision-resistant + # than the 16-char LONG key ID. Uses machine-parseable colon format so + # the extraction is stable across gpg output layout changes. + MAIN_GPG_FPR=$(gpg --list-keys --with-colons "$GPG_IDENTIFIER" | awk -F: '/^fpr/ { print $10; exit }') GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" | grep "\[S\]" -A 1 | tail -n 1 | awk '{print $1}') # Careful, if you don't have a subkey, requesting it will export more than you want. # Export the main key instead (its better to have subkeys, but this is a lesser evil) @@ -404,7 +553,7 @@ export_encrypted_code_signing_keys(){ # anyway. GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" | grep "\[C\]" -A 1 | tail -n 1 | awk '{print $1}') fi - echo "MAIN_GPG_KEYID = $MAIN_GPG_KEYID" + echo "MAIN_GPG_FPR = $MAIN_GPG_FPR" echo "GPG_SIGN_SUBKEY = $GPG_SIGN_SUBKEY" # Only export the signing secret subkey @@ -418,9 +567,10 @@ export_encrypted_code_signing_keys(){ GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -e -a -in dev/ci_public_gpg_key.pgp > dev/ci_public_gpg_key.pgp.enc GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -e -a -in dev/ci_secret_gpg_subkeys.pgp > dev/ci_secret_gpg_subkeys.pgp.enc GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -e -a -in dev/gpg_owner_trust > dev/gpg_owner_trust.enc - echo "$MAIN_GPG_KEYID" > dev/public_gpg_key + # Store the full fingerprint as the public signer anchor + printf '%s\n' "$MAIN_GPG_FPR" > dev/public_gpg_key - # Test decrpyt + # Test decrypt GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_public_gpg_key.pgp.enc | gpg --list-packets --verbose GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | gpg --list-packets --verbose GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/gpg_owner_trust.enc @@ -434,7 +584,6 @@ export_encrypted_code_signing_keys(){ rm dev/gpg_owner_trust git status git add dev/*.enc - git add dev/gpg_owner_trust git add dev/public_gpg_key } @@ -444,6 +593,207 @@ export_encrypted_code_signing_keys(){ #} +_gpg_locate_signing_subkey(){ + __doc__=" + Internal helper. Sets MAIN_GPG_FPR and GPG_SIGN_SUBKEY in the caller's + scope. Exits non-zero and prints a diagnostic if either cannot be found. + Requires GPG_IDENTIFIER to already be set. + " + MAIN_GPG_FPR=$(gpg --list-keys --with-colons "$GPG_IDENTIFIER" \ + | awk -F: '/^fpr/ { print $10; exit }') + GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" \ + | grep "\[S\]" -A 1 | tail -n 1 | awk '{print $1}') + if [[ "$GPG_SIGN_SUBKEY" == "" ]]; then + echo "WARNING: no [S] subkey found for $GPG_IDENTIFIER, falling back to [C] key" >&2 + GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" \ + | grep "\[C\]" -A 1 | tail -n 1 | awk '{print $1}') + fi + if [[ -z "$MAIN_GPG_FPR" ]]; then + echo "ERROR: could not determine primary key fingerprint for $GPG_IDENTIFIER" >&2 + return 1 + fi + if [[ -z "$GPG_SIGN_SUBKEY" ]]; then + echo "ERROR: could not find a signing subkey for $GPG_IDENTIFIER" >&2 + return 1 + fi + echo "MAIN_GPG_FPR = $MAIN_GPG_FPR" + echo "GPG_SIGN_SUBKEY = $GPG_SIGN_SUBKEY" +} + + +upload_github_gpg_secrets(){ + __doc__=" + Export GPG signing subkey material and upload it directly to GitHub + Actions as environment-scoped secrets (pypi + testpypi environments). + Also writes dev/public_gpg_key with the full primary key fingerprint + and stages it for commit. + + No .enc files are written to disk or committed to git. + This implements ci_gpg_secret_transport = 'direct_ci' for GitHub. + Call this instead of export_encrypted_code_signing_keys. + " + load_secrets + source dev/secrets_configuration.sh + + local pypi_env="${GITHUB_ENVIRONMENT_PYPI:-pypi}" + local testpypi_env="${GITHUB_ENVIRONMENT_TESTPYPI:-testpypi}" + + _gpg_locate_signing_subkey || return 1 + + local TMP_DIR + TMP_DIR=$(mktemp -d -t gpg-ci-XXXXXXXXXX) + # shellcheck disable=SC2064 + trap "rm -rf '$TMP_DIR'" RETURN + + # Export signing subkey secret material and associated public key + gpg --armor --export-options export-backup \ + --export-secret-subkeys "${GPG_SIGN_SUBKEY}!" > "$TMP_DIR/signing_subkey.pgp" + gpg --armor --export "${GPG_SIGN_SUBKEY}" > "$TMP_DIR/public_key.pgp" + gpg --export-ownertrust > "$TMP_DIR/owner_trust" + + # Single-line base64 for robust secret transport (tr -d '\n' is + # portable across GNU and macOS; avoids -w 0 / -b 0 divergence). + local GPG_SECRET_SIGNING_SUBKEY_B64 GPG_PUBLIC_KEY_B64 GPG_OWNER_TRUST_B64 + GPG_SECRET_SIGNING_SUBKEY_B64=$(base64 < "$TMP_DIR/signing_subkey.pgp" | tr -d '\n') + GPG_PUBLIC_KEY_B64=$(base64 < "$TMP_DIR/public_key.pgp" | tr -d '\n') + GPG_OWNER_TRUST_B64=$(base64 < "$TMP_DIR/owner_trust" | tr -d '\n') + + if [[ -z "$GPG_SECRET_SIGNING_SUBKEY_B64" ]]; then + echo "ERROR: signing subkey export is empty — aborting" >&2 + return 1 + fi + + # Write the public fingerprint anchor to the repo. + # This file is the only GPG artifact committed in direct_ci mode. + mkdir -p dev + printf '%s\n' "$MAIN_GPG_FPR" > dev/public_gpg_key + git add dev/public_gpg_key + git status + + unload_secrets + + # Ensure deployment environments exist before scoping secrets to them + setup_github_release_environments + + if ! gh auth status; then gh auth login; fi + + toggle_setx_enter + for env_name in "$pypi_env" "$testpypi_env"; do + upload_one_github_secret "GPG_SECRET_SIGNING_SUBKEY_B64" \ + "$GPG_SECRET_SIGNING_SUBKEY_B64" "$env_name" + upload_one_github_secret "GPG_PUBLIC_KEY_B64" \ + "$GPG_PUBLIC_KEY_B64" "$env_name" + upload_one_github_secret "GPG_OWNER_TRUST_B64" \ + "$GPG_OWNER_TRUST_B64" "$env_name" + done + toggle_setx_exit +} + + +upload_gitlab_gpg_secrets(){ + __doc__=" + Export GPG signing subkey material and upload it directly to GitLab + CI/CD project variables (protected=true, masked=true). + Also writes dev/public_gpg_key with the full primary key fingerprint + and stages it for commit. + + No .enc files are written to disk or committed to git. + This implements ci_gpg_secret_transport = 'direct_ci' for GitLab. + Call this instead of export_encrypted_code_signing_keys. + " + load_secrets + source dev/secrets_configuration.sh + + _gpg_locate_signing_subkey || return 1 + + local TMP_DIR + TMP_DIR=$(mktemp -d -t gpg-ci-XXXXXXXXXX) + # shellcheck disable=SC2064 + trap "rm -rf '$TMP_DIR'" RETURN + + gpg --armor --export-options export-backup \ + --export-secret-subkeys "${GPG_SIGN_SUBKEY}!" > "$TMP_DIR/signing_subkey.pgp" + gpg --armor --export "${GPG_SIGN_SUBKEY}" > "$TMP_DIR/public_key.pgp" + gpg --export-ownertrust > "$TMP_DIR/owner_trust" + + local GPG_SECRET_SIGNING_SUBKEY_B64 GPG_PUBLIC_KEY_B64 GPG_OWNER_TRUST_B64 + GPG_SECRET_SIGNING_SUBKEY_B64=$(base64 < "$TMP_DIR/signing_subkey.pgp" | tr -d '\n') + GPG_PUBLIC_KEY_B64=$(base64 < "$TMP_DIR/public_key.pgp" | tr -d '\n') + GPG_OWNER_TRUST_B64=$(base64 < "$TMP_DIR/owner_trust" | tr -d '\n') + + if [[ -z "$GPG_SECRET_SIGNING_SUBKEY_B64" ]]; then + echo "ERROR: signing subkey export is empty — aborting" >&2 + return 1 + fi + + # Write the public fingerprint anchor to the repo. + mkdir -p dev + printf '%s\n' "$MAIN_GPG_FPR" > dev/public_gpg_key + git add dev/public_gpg_key + git status + + # Locate the GitLab project via git remote + local REMOTE=origin + local HOST + HOST=https://$(git remote get-url $REMOTE \ + | cut -d "/" -f 1 | cut -d "@" -f 2 | cut -d ":" -f 1) + local PRIVATE_GITLAB_TOKEN + PRIVATE_GITLAB_TOKEN=$(git_token_for "$HOST") + if [[ "$PRIVATE_GITLAB_TOKEN" == "ERROR" ]]; then + echo "ERROR: failed to load GitLab authentication token" >&2 + return 1 + fi + + local PROJECT_PATH + PROJECT_PATH=$(git remote get-url $REMOTE | cut -d ":" -f 2 | sed 's/\.git$//') + local PROJECT_ID + PROJECT_ID=$(curl --fail --show-error --silent --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects?search=$(basename "$PROJECT_PATH")" \ + | jq -r ".[] | select(.path_with_namespace==\"$PROJECT_PATH\") | .id") + if [[ -z "$PROJECT_ID" ]]; then + echo "ERROR: could not determine GitLab project ID for $PROJECT_PATH" >&2 + return 1 + fi + echo "PROJECT_ID = $PROJECT_ID" + + _gitlab_upsert_protected_var(){ + local key="$1" value="$2" + local existing + existing=$(curl -s --show-error --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables/$key" \ + | jq -r '.key // empty') + if [[ -z "$existing" ]]; then + curl --fail --silent --show-error --request POST \ + --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables" \ + --form "key=$key" \ + --form "value=$value" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" + else + curl --fail --silent --show-error --request PUT \ + --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ + "$HOST/api/v4/projects/$PROJECT_ID/variables/$key" \ + --form "value=$value" \ + --form "protected=true" \ + --form "masked=true" \ + --form "environment_scope=*" \ + --form "variable_type=env_var" + fi + } + + unload_secrets + + toggle_setx_enter + _gitlab_upsert_protected_var "GPG_SECRET_SIGNING_SUBKEY_B64" "$GPG_SECRET_SIGNING_SUBKEY_B64" + _gitlab_upsert_protected_var "GPG_PUBLIC_KEY_B64" "$GPG_PUBLIC_KEY_B64" + _gitlab_upsert_protected_var "GPG_OWNER_TRUST_B64" "$GPG_OWNER_TRUST_B64" + toggle_setx_exit +} + + _test_gnu(){ # shellcheck disable=SC2155 export GNUPGHOME=$(mktemp -d -t) diff --git a/docs/source/conf.py b/docs/source/conf.py index 8ca8009..4ce3bea 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -110,12 +110,10 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) - # -- Project information ----------------------------------------------------- +from os.path import dirname, exists, join + import sphinx_rtd_theme -from os.path import exists -from os.path import dirname -from os.path import join def parse_version(fpath): @@ -123,23 +121,27 @@ def parse_version(fpath): Statically parse the version number from a python file """ import ast + if not exists(fpath): raise ValueError('fpath={!r} does not exist'.format(fpath)) with open(fpath, 'r') as file_: sourcecode = file_.read() pt = ast.parse(sourcecode) + class VersionVisitor(ast.NodeVisitor): def visit_Assign(self, node): for target in node.targets: if getattr(target, 'id', None) == '__version__': self.version = node.value.s + visitor = VersionVisitor() visitor.visit(pt) return visitor.version + project = 'cmd_queue' -copyright = '2026, Kitware Inc. Jon Crall' -author = 'Kitware Inc. Jon Crall' +copyright = '2026, Kitware Inc., Jon Crall' +author = 'Kitware Inc., Jon Crall' modname = 'cmd_queue' repo_dpath = dirname(dirname(dirname(__file__))) @@ -182,8 +184,8 @@ def visit_Assign(self, node): napoleon_use_param = False napoleon_use_ivar = True -#autoapi_type = 'python' -#autoapi_dirs = [mod_dpath] +# autoapi_type = 'python' +# autoapi_dirs = [mod_dpath] autodoc_inherit_docstrings = False @@ -198,7 +200,8 @@ def visit_Assign(self, node): ] autodoc_default_options = { # Document callable classes - 'special-members': '__call__'} + 'special-members': '__call__' +} autodoc_member_order = 'bysource' autoclass_content = 'both' @@ -233,16 +236,13 @@ def visit_Assign(self, node): 'networkx': ('https://networkx.org/documentation/stable/', None), 'scriptconfig': ('https://scriptconfig.readthedocs.io/en/latest/', None), 'rich': ('https://rich.readthedocs.io/en/latest/', None), - 'numpy': ('https://numpy.org/doc/stable/', None), 'sympy': ('https://docs.sympy.org/latest/', None), 'scikit-learn': ('https://scikit-learn.org/stable/', None), 'pandas': ('https://pandas.pydata.org/docs/', None), 'matplotlib': ('https://matplotlib.org/stable/', None), - 'pytest': ('https://docs.pytest.org/en/latest/', None), 'platformdirs': ('https://platformdirs.readthedocs.io/en/latest/', None), - 'timerit': ('https://timerit.readthedocs.io/en/latest/', None), 'progiter': ('https://progiter.readthedocs.io/en/latest/', None), 'dateutil': ('https://dateutil.readthedocs.io/en/latest/', None), @@ -358,15 +358,12 @@ def visit_Assign(self, node): # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -376,8 +373,13 @@ def visit_Assign(self, node): # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'cmd_queue.tex', 'cmd_queue Documentation', - 'Kitware Inc. Jon Crall', 'manual'), + ( + master_doc, + 'cmd_queue.tex', + 'cmd_queue Documentation', + 'Kitware Inc., Jon Crall', + 'manual', + ), ] @@ -385,10 +387,7 @@ def visit_Assign(self, node): # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'cmd_queue', 'cmd_queue Documentation', - [author], 1) -] +man_pages = [(master_doc, 'cmd_queue', 'cmd_queue Documentation', [author], 1)] # -- Options for Texinfo output ---------------------------------------------- @@ -397,14 +396,21 @@ def visit_Assign(self, node): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'cmd_queue', 'cmd_queue Documentation', - author, 'cmd_queue', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + 'cmd_queue', + 'cmd_queue Documentation', + author, + 'cmd_queue', + 'One line description of project.', + 'Miscellaneous', + ), ] # -- Extension configuration ------------------------------------------------- from sphinx.domains.python import PythonDomain # NOQA + # from sphinx.application import Sphinx # NOQA from typing import Any, List # NOQA @@ -414,6 +420,7 @@ def visit_Assign(self, node): MAX_TIME_MINUTES = None if MAX_TIME_MINUTES: import ubelt # NOQA + TIMER = ubelt.Timer() TIMER.tic() @@ -423,7 +430,10 @@ class PatchedPythonDomain(PythonDomain): References: https://github.com/sphinx-doc/sphinx/issues/3866 """ - def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode): + + def resolve_xref( + self, env, fromdocname, builder, type, target, node, contnode + ): """ Helps to resolves cross-references """ @@ -432,7 +442,8 @@ def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode): if target.startswith('xdoc.'): target = 'xdoctest.' + target[3] return_value = super(PatchedPythonDomain, self).resolve_xref( - env, fromdocname, builder, typ, target, node, contnode) + env, fromdocname, builder, type, target, node, contnode + ) return return_value @@ -460,6 +471,7 @@ def register_section(self, tag, alias=None): alias = [alias] if not isinstance(alias, (list, tuple, set)) else alias alias.append(tag) alias = tuple(alias) + # TODO: better tag patterns def _wrap(func): self.registry[tag] = { @@ -468,6 +480,7 @@ def _wrap(func): 'func': func, } return func + return _wrap def _register_builtins(self): @@ -485,9 +498,12 @@ def commandline(lines): new_lines.extend(lines[1:]) return new_lines - @self.register_section(tag='SpecialExample', alias=['Benchmark', 'Sympy', 'Doctest']) + @self.register_section( + tag='SpecialExample', alias=['Benchmark', 'Sympy', 'Doctest'] + ) def benchmark(lines): import textwrap + new_lines = [] tag = lines[0].replace(':', '').strip() # new_lines.append(lines[0]) # TODO: it would be nice to change the tagline. @@ -560,7 +576,7 @@ def process(self, lines): accum = [] def accept(): - """ called when we finish reading a section """ + """called when we finish reading a section""" if curr_mode == '__doc__': # Keep the lines as-is new_lines.extend(accum) @@ -574,7 +590,6 @@ def accept(): accum[:] = [] for line in orig_lines: - found = None for regitem in self.registry.values(): if line.startswith(regitem['alias']): @@ -604,8 +619,15 @@ def accept(): return lines - def process_docstring_callback(self, app, what_: str, name: str, obj: Any, - options: Any, lines: List[str]) -> None: + def process_docstring_callback( + self, + app, + what_: str, + name: str, + obj: Any, + options: Any, + lines: List[str], + ) -> None: """ Callback to be registered to autodoc-process-docstring @@ -634,7 +656,9 @@ def process_docstring_callback(self, app, what_: str, name: str, obj: Any, https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html """ if self.debug: - print(f'ProcessDocstring: name={name}, what_={what_}, num_lines={len(lines)}') + print( + f'ProcessDocstring: name={name}, what_={what_}, num_lines={len(lines)}' + ) # print('BEFORE:') # import ubelt as ub @@ -666,9 +690,9 @@ def process_docstring_callback(self, app, what_: str, name: str, obj: Any, FIX_EXAMPLE_FORMATTING = 1 if FIX_EXAMPLE_FORMATTING: for idx, line in enumerate(lines): - if line == "Example:": - lines[idx] = "**Example:**" - lines.insert(idx + 1, "") + if line == 'Example:': + lines[idx] = '**Example:**' + lines.insert(idx + 1, '') REFORMAT_SECTIONS = 0 if REFORMAT_SECTIONS: @@ -710,7 +734,7 @@ def process_docstring_callback(self, app, what_: str, name: str, obj: Any, text = found['text'] new_lines = [] for para in text.split('\n\n'): - indent = para[:len(para) - len(para.lstrip())] + indent = para[: len(para) - len(para.lstrip())] new_paragraph = indent + paragraph(para) new_lines.append(new_paragraph) new_lines.append('') @@ -729,11 +753,13 @@ class SphinxDocstring: """ Helper to parse and modify sphinx docstrings """ + def __init__(docstr, lines): docstr.lines = lines # FORMAT THE RETURNS SECTION A BIT NICER import re + tag_pat = re.compile(r'^:(\w*):') directive_pat = re.compile(r'^.. (\w*)::\s*(\w*)') @@ -744,16 +770,22 @@ def __init__(docstr, lines): directive_match = directive_pat.search(line) if tag_match: tag = tag_match.groups()[0] - sphinx_parts.append({ - 'tag': tag, 'start_offset': idx, - 'type': 'tag', - }) + sphinx_parts.append( + { + 'tag': tag, + 'start_offset': idx, + 'type': 'tag', + } + ) elif directive_match: tag = directive_match.groups()[0] - sphinx_parts.append({ - 'tag': tag, 'start_offset': idx, - 'type': 'directive', - }) + sphinx_parts.append( + { + 'tag': tag, + 'start_offset': idx, + 'type': 'directive', + } + ) prev_offset = len(lines) for part in sphinx_parts[::-1]: @@ -793,6 +825,7 @@ def paragraph(text): str: the reduced text block """ import re + out = re.sub(r'\s\s*', ' ', text).strip() return out @@ -802,9 +835,12 @@ def create_doctest_figure(app, obj, name, lines): The idea is that each doctest that produces a figure should generate that and then that figure should be part of the docs. """ - import xdoctest import sys import types + + import xdoctest + import xdoctest.core + if isinstance(obj, types.ModuleType): module = obj else: @@ -818,14 +854,15 @@ def create_doctest_figure(app, obj, name, lines): # print(doctest.format_src()) import pathlib + # HACK: write to the srcdir doc_outdir = pathlib.Path(app.outdir) doc_srcdir = pathlib.Path(app.srcdir) doc_static_outdir = doc_outdir / '_static' doc_static_srcdir = doc_srcdir / '_static' - src_fig_dpath = (doc_static_srcdir / 'images') + src_fig_dpath = doc_static_srcdir / 'images' src_fig_dpath.mkdir(exist_ok=True, parents=True) - out_fig_dpath = (doc_static_outdir / 'images') + out_fig_dpath = doc_static_outdir / 'images' out_fig_dpath.mkdir(exist_ok=True, parents=True) # fig_dpath = (doc_outdir / 'autofigs' / name).mkdir(exist_ok=True) @@ -833,6 +870,7 @@ def create_doctest_figure(app, obj, name, lines): fig_num = 1 import kwplot + kwplot.autompl(force='agg') plt = kwplot.autoplt() @@ -843,7 +881,10 @@ def create_doctest_figure(app, obj, name, lines): # so we can get different figures. But we can hack it for now. import re - split_parts = re.split('({}\\s*\n)'.format(re.escape('.. rubric:: Example')), docstr) + + split_parts = re.split( + '({}\\s*\n)'.format(re.escape('.. rubric:: Example')), docstr + ) # split_parts = docstr.split('.. rubric:: Example') # import xdev @@ -853,7 +894,9 @@ def doctest_line_offsets(doctest): # Where the doctests starts and ends relative to the file start_line_offset = doctest.lineno - 1 last_part = doctest._parts[-1] - last_line_offset = start_line_offset + last_part.line_offset + last_part.n_lines - 1 + last_line_offset = ( + start_line_offset + last_part.line_offset + last_part.n_lines - 1 + ) offsets = { 'start': start_line_offset, 'end': last_line_offset, @@ -870,10 +913,14 @@ def doctest_line_offsets(doctest): for part in split_parts: num_lines = part.count('\n') - doctests = list(xdoctest.core.parse_docstr_examples( - part, modpath=modpath, callname=name, - # style='google' - )) + doctests = list( + xdoctest.core.parse_docstr_examples( + part, + modpath=modpath, + callname=name, + # style='google' + ) + ) # print(doctests) # doctests = list(xdoctest.core.parse_docstr_examples( @@ -894,6 +941,7 @@ def doctest_line_offsets(doctest): # Define dummy skipped exception if pytest is not available class Skipped(Exception): pass + try: doctest.mode = 'native' doctest.run(verbose=0, on_error='raise') @@ -913,19 +961,23 @@ class Skipped(Exception): fig_num += 1 # path_name = path_sanatize(name) path_name = (name).replace('.', '_') - fig_fpath = src_fig_dpath / f'fig_{path_name}_{fig_num:03d}.jpeg' + fig_fpath = ( + src_fig_dpath / f'fig_{path_name}_{fig_num:03d}.jpeg' + ) fig.savefig(fig_fpath) print(f'Wrote figure: {fig_fpath}') - to_insert_fpaths.append({ - 'insert_line_index': insert_line_index, - 'fpath': fig_fpath, - }) + to_insert_fpaths.append( + { + 'insert_line_index': insert_line_index, + 'fpath': fig_fpath, + } + ) for fig in figures: plt.close(fig) # kwplot.close_figures(figures) - curr_line_offset += (num_lines) + curr_line_offset += num_lines # if len(doctests) > 1: # doctests @@ -938,6 +990,7 @@ class Skipped(Exception): end_index = len(lines) # Reverse order for inserts import shutil + for info in to_insert_fpaths[::-1]: src_abs_fpath = info['fpath'] @@ -966,7 +1019,9 @@ class Skipped(Exception): insert_index = end_index else: raise KeyError(INSERT_AT) - lines.insert(insert_index, '.. image:: {}'.format('..' / rel_to_root_fpath)) + lines.insert( + insert_index, '.. image:: {}'.format('..' / rel_to_root_fpath) + ) # lines.insert(insert_index, '.. image:: {}'.format(rel_to_root_fpath)) # lines.insert(insert_index, '.. image:: {}'.format(rel_to_static_fpath)) lines.insert(insert_index, '') @@ -979,8 +1034,10 @@ def postprocess_hyperlinks(app, doctree, docname): "autodoc-process-docstring" event. """ # Your hyperlink postprocessing logic here - from docutils import nodes import pathlib + + from docutils import nodes + for node in doctree.traverse(nodes.reference): if 'refuri' in node.attributes: refuri = node.attributes['refuri'] @@ -989,13 +1046,15 @@ def postprocess_hyperlinks(app, doctree, docname): fpath = pathlib.Path(node.document['source']) parent_dpath = fpath.parent if (parent_dpath / refuri).exists(): - node.attributes['refuri'] = refuri.replace('.rst', '.html') + node.attributes['refuri'] = refuri.replace( + '.rst', '.html' + ) else: raise AssertionError def fix_rst_todo_section(lines): - new_lines = [] + # new_lines = [] for line in lines: ... ... @@ -1003,17 +1062,23 @@ def fix_rst_todo_section(lines): def setup(app): import sphinx - app : sphinx.application.Sphinx = app + import sphinx.application + + app: sphinx.application.Sphinx = app app.add_domain(PatchedPythonDomain, override=True) - app.connect("doctree-resolved", postprocess_hyperlinks) + app.connect('doctree-resolved', postprocess_hyperlinks) docstring_processor = GoogleStyleDocstringProcessor() # https://stackoverflow.com/questions/26534184/can-sphinx-ignore-certain-tags-in-python-docstrings - app.connect('autodoc-process-docstring', docstring_processor.process_docstring_callback) + app.connect( + 'autodoc-process-docstring', + docstring_processor.process_docstring_callback, + ) def copy(src, dst): import shutil + print(f'Copy {src} -> {dst}') assert src.exists() if not dst.parent.exists(): @@ -1024,16 +1089,17 @@ def copy(src, dst): HACK_FOR_KWCOCO = 0 if HACK_FOR_KWCOCO: import pathlib + doc_outdir = pathlib.Path(app.outdir) / 'auto' doc_srcdir = pathlib.Path(app.srcdir) / 'auto' mod_dpath = doc_srcdir / '../../../kwcoco' - src_fpath = (mod_dpath / 'coco_schema.json') + src_fpath = mod_dpath / 'coco_schema.json' copy(src_fpath, doc_outdir / src_fpath.name) copy(src_fpath, doc_srcdir / src_fpath.name) - src_fpath = (mod_dpath / 'coco_schema_informal.rst') + src_fpath = mod_dpath / 'coco_schema_informal.rst' copy(src_fpath, doc_outdir / src_fpath.name) copy(src_fpath, doc_srcdir / src_fpath.name) return app diff --git a/examples/slurm_example.py b/examples/slurm_example.py index db5f9ef..0d34cb2 100644 --- a/examples/slurm_example.py +++ b/examples/slurm_example.py @@ -1,25 +1,36 @@ - - def main(): - import cmd_queue import ubelt as ub - queue = cmd_queue.Queue.create(backend='slurm', partition='project123', - account='user123', ntasks=1) - job1 = queue.submit(ub.codeblock( - ''' + import cmd_queue + + queue = cmd_queue.Queue.create( + backend='slurm', partition='project123', account='user123', ntasks=1 + ) + + job1 = queue.submit( + ub.codeblock( + """ command1 --input=foo.txt --output=bar.txt - ''')) + """ + ) + ) - job2 = queue.submit(ub.codeblock( - ''' + job2 = queue.submit( + ub.codeblock( + """ command2 --input=foo.txt --output=baz.txt - ''')) + """ + ) + ) - queue.submit(ub.codeblock( - ''' + queue.submit( + ub.codeblock( + """ command3 --input1=bar.txt --input2=baz.txt --output=buz.txt - '''), depends=[job2, job1]) + """ + ), + depends=[job2, job1], + ) queue.print_commands() diff --git a/examples/tmux_example.py b/examples/tmux_example.py new file mode 100644 index 0000000..cb7784e --- /dev/null +++ b/examples/tmux_example.py @@ -0,0 +1,225 @@ +""" +Demonstrates the ``monitor`` kwarg on the tmux backend. + +Four monitor modes are illustrated: + + * ``monitor='hybrid'`` (default) — the live status table renders in + the current shell *and* a detached ``cmd_queue monitor`` tmux + session is spawned alongside. Press ``[a]`` from the inline UI to + attach (or switch-client) to the tmux session, ``[q]`` to stop + watching. + + * ``monitor='inline'`` — only the in-shell live UI; no tmux session + is spawned. + + * ``monitor='tmux'`` — only the detached tmux session, no inline + UI. Useful when you want the visible status table (and post-run + cleanup) to survive the calling shell closing. + + * ``monitor='none'`` — no live UI; ``run()`` headless-blocks until + jobs finish. Useful in non-interactive scripts. The reattach hint + is still printed so a human can attach via ``cmd_queue monitor``. + +The job DAG has four logical levels and shows meaningful parallel execution. +Each logical job is split into a serial chain of smaller one-second jobs. +This creates more queue jobs while keeping the total runtime roughly the same. + + Level 1 (prep): prep-A prep-B prep-C prep-D (parallel, 5-8s) + Level 2 (process): proc-A proc-B proc-C proc-D (each after one prep, 3-5s) + Level 3 (merge): merge-X (after proc-A + proc-B) + merge-Y (after proc-C + proc-D) (parallel, 3-4s) + Level 4 (finalize): final (after both merges, 2s) + +By default one of the proc jobs is forced to fail so the failure +summary (and dependency-skip cascade) is visible. Pass ``--failures=0`` +for a clean run, or higher numbers for more failures. + +CommandLine: + # Default (hybrid): inline monitor in this shell + attachable tmux + # session. Press [a] in the inline UI to jump into the tmux monitor, + # [q] to stop watching (queue keeps running). + python ~/code/cmd_queue/examples/tmux_example.py + + # Inline-only, no side tmux session + python ~/code/cmd_queue/examples/tmux_example.py --mode=inline + + # Spawn the monitor only in a tmux session (no inline view) + python ~/code/cmd_queue/examples/tmux_example.py --mode=tmux + + # Run silently and reattach manually with `cmd_queue monitor ` + python ~/code/cmd_queue/examples/tmux_example.py --mode=none + + # Force a clean run (no injected failures) + python ~/code/cmd_queue/examples/tmux_example.py --failures=0 +""" + +import scriptconfig as scfg +import ubelt as ub + + +class TmuxExampleConfig(scfg.DataConfig): + """ + Automatically created module for IPython interactive environment + """ + + mode = scfg.Value( + 'hybrid', + help='Where the monitor UI runs.', + choices=['hybrid', 'inline', 'tmux', 'none'], + ) + name = scfg.Value( + 'tmux-example', + help=ub.paragraph( + """ + Queue name; also doubles as the lookup key for `cmd_queue + monitor `. + """ + ), + ) + workers = scfg.Value(4, type=int, help='Number of parallel tmux workers.') + failures = scfg.Value( + 6, + type=int, + help=ub.paragraph( + """ + Number of proc-* logical jobs to force into failure (0-4). + The failures cascade: dependent merge/final jobs are skipped. + """ + ), + ) + logs = scfg.Value( + True, + isflag=True, + help=ub.paragraph( + """ + Set to False to disable per-job log capture (default: enabled). + """ + ), + ) + + +def main(): + import cmd_queue + + args = TmuxExampleConfig.cli() + + queue = cmd_queue.Queue.create( + backend='tmux', + size=args.workers, + name=args.name, + ) + + proc_names = ['proc-A', 'proc-B', 'proc-C', 'proc-D'] + fail_set = set(proc_names[: max(0, min(args.failures, len(proc_names)))]) + + submit_kw = {'log': args.logs} + + def submit_sleep_chain(base_name, total_sleep, depends=None, fail=False): + """ + Submit a logical sleep job as a chain of smaller queue jobs. + + This keeps the logical runtime roughly equal to ``total_sleep``, + but gives the tmux monitor more individual jobs to display. + + Example: + ``submit_sleep_chain('prep-A', 5)`` creates: + + prep-A-01 -> prep-A-02 -> prep-A-03 -> prep-A-04 -> prep-A-05 + + Each part sleeps for one second, so the total duration is still + about five seconds, plus a small amount of scheduling overhead. + """ + if total_sleep <= 0: + raise ValueError('total_sleep must be positive') + + prev_depends = list(depends or []) + last_job = None + + for idx in range(total_sleep): + part = idx + 1 + name = f'{base_name}-{part:02d}' + is_final_part = part == total_sleep + + cmd = f'echo "[{name}] start"; sleep 1; ' + + if is_final_part and fail: + cmd += f'echo "[{base_name}] FORCED FAILURE" >&2; exit 1' + elif is_final_part: + cmd += f'echo "[{base_name}] done"' + else: + cmd += f'echo "[{name}] done"' + + last_job = queue.submit( + cmd, + name=name, + depends=prev_depends, + **submit_kw, + ) + prev_depends = [last_job] + + return last_job + + # Level 1: four independent prep jobs — run fully in parallel. + # Each logical prep job is split into a serial chain of smaller jobs. + prep_a = submit_sleep_chain('prep-A', 5) + prep_b = submit_sleep_chain('prep-B', 7) + prep_c = submit_sleep_chain('prep-C', 6) + prep_d = submit_sleep_chain('prep-D', 8) + + # Level 2: each process job depends on exactly one prep job; some + # may be forced to fail by --failures. + proc_a = submit_sleep_chain( + 'proc-A', 3, depends=[prep_a], fail='proc-A' in fail_set + ) + proc_b = submit_sleep_chain( + 'proc-B', 4, depends=[prep_b], fail='proc-B' in fail_set + ) + proc_c = submit_sleep_chain( + 'proc-C', 5, depends=[prep_c], fail='proc-C' in fail_set + ) + proc_d = submit_sleep_chain( + 'proc-D', 3, depends=[prep_d], fail='proc-D' in fail_set + ) + + # Level 3: two merge jobs, each waiting on a pair of proc jobs. + merge_x = submit_sleep_chain('merge-X', 4, depends=[proc_a, proc_b]) + merge_y = submit_sleep_chain('merge-Y', 3, depends=[proc_c, proc_d]) + + # Level 4: single finalize job — the whole pipeline converges here. + submit_sleep_chain('final', 2, depends=[merge_x, merge_y]) + + queue.print_graph() + + if not queue.is_available(): + raise SystemExit('tmux backend not available on this machine') + + print( + f'\nLaunching with monitor={args.mode!r}, workers={args.workers}, ' + f'failures={args.failures}, logs={args.logs}\n' + ) + + result = queue.run( + block=True, + monitor=args.mode, + onfail='kill', + other_session_handler='auto', + ) + + print(f'\nrun() returned: {result}') + if args.mode == 'tmux': + print( + 'The monitor tmux session stayed alive after the workers ' + 'finished so the final status table is visible. Reattach ' + 'from any shell with:\n' + f' tmux attach -t cmdq-monitor-{args.name}-...\n' + 'or look it up by queue name with:\n' + f' cmd_queue monitor {args.name}' + ) + + +if __name__ == '__main__': + """ + CommandLine: + python ~/code/cmd_queue/examples/tmux_example.py + """ + main() diff --git a/pyproject.toml b/pyproject.toml index 1f91884..d0ec669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,6 @@ [build-system] requires = [ "setuptools>=41.0.1", "wheel>=0.37.1",] -[tool.mypy] -ignore_missing_imports = true - [tool.xcookie] tags = [ "kitware", "purepy", "gitlab",] mod_name = "cmd_queue" @@ -11,7 +8,7 @@ repo_name = "cmd_queue" author = "Kitware Inc., Jon Crall" author_email = "kitware@kitware.com, jon.crall@kitware.com" description = "The cmd_queue module for a DAG of bash commands" -min_python = "3.9" +min_python = "3.10" url = "https://gitlab.kitware.com/computer-vision/cmd_queue" license = "Apache 2" dev_status = "beta" @@ -40,3 +37,33 @@ skip = ['./docs/build', './*.egg-info', './build', './htmlcov'] count = true quiet-level = 3 ignore-words-list = ['wont', 'cant', 'ANS', 'doesnt', 'arent', 'ans', 'thats', 'datas', 'isnt'] + + +[tool.mypy] +ignore_missing_imports = true +#ignore_errors = true + +[tool.ty.rules] +unused-ignore-comment = "ignore" +unused-type-ignore-comment = "ignore" +unresolved-import = "ignore" + +[tool.ruff] +line-length = 80 +target-version = "py310" + +[tool.ruff.lint] +# Enable Flake8 (E, F) and isort (I) rules. +select = ["E", "F", "I"] +# Ignore specific rules, for example, E501 (line too long) as it's handled by the formatter. +ignore = [ + "E501", # line too long + "E402", # Module level import not at top of file +] + +[tool.ruff.format] +quote-style = "single" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" +docstring-code-format = false diff --git a/requirements/tests.txt b/requirements/tests.txt index bf7acfe..a2eb4ae 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -2,15 +2,15 @@ pytest>=8.1.1 ; python_version < '4.0' and python_version >= '3.13' # Python 3.13+ pytest>=8.1.1 ; python_version < '3.13' and python_version >= '3.12' # Python 3.12 pytest>=8.1.1 ; python_version < '3.12' and python_version >= '3.11' # Python 3.11 -pytest>=6.2.5 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 -pytest>=6.2.5 ; python_version < '3.10' and python_version >= '3.8' # Python 3.8-3.9 +pytest>=8.1.1 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 +pytest>=8.1.1 ; python_version < '3.10' and python_version >= '3.8' # Python 3.8-3.9 xdoctest >= 1.1.5 pytest-cov>=4.1.0 ; python_version < '4.0' and python_version >= '3.11' # Python 3.11+ pytest-cov>=3.0.0 ; python_version < '3.11' # -coverage>=7.0.0 ; python_version < '4.0' and python_version >= '3.11' # Python 3.11+ -coverage>=6.1.2 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 -coverage>=6.1.1 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 +coverage>=7.3.0 ; python_version < '4.0' and python_version >= '3.12' # Python 3.12 +coverage>=6.1.1 ; python_version < '3.12' and python_version >= '3.10' # Python 3.10-3.11 +coverage>=5.3.1 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 coverage>=6.1.1 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 diff --git a/run_tests.py b/run_tests.py index d55ab03..9b876c4 100755 --- a/run_tests.py +++ b/run_tests.py @@ -1,17 +1,23 @@ #!/usr/bin/env python if __name__ == '__main__': - import pytest import sys + + import pytest + package_name = 'cmd_queue' mod_dpath = 'cmd_queue' test_dpath = 'tests' pytest_args = [ - '--cov-config', 'pyproject.toml', - '--cov-report', 'html', - '--cov-report', 'term', + '--cov-config', + 'pyproject.toml', + '--cov-report', + 'html', + '--cov-report', + 'term', '--xdoctest', '--cov=' + package_name, - mod_dpath, test_dpath + mod_dpath, + test_dpath, ] pytest_args = pytest_args + sys.argv[1:] sys.exit(pytest.main(pytest_args)) diff --git a/setup.py b/setup.py index 64c3858..bdbc260 100755 --- a/setup.py +++ b/setup.py @@ -1,18 +1,18 @@ #!/usr/bin/env python # Generated by ~/code/xcookie/xcookie/builders/setup.py # based on part ~/code/xcookie/xcookie/rc/setup.py.in -import sys import re -from os.path import exists, dirname, join -from setuptools import find_packages -from setuptools import setup +import sys +from os.path import dirname, exists, join + +from setuptools import find_packages, setup def parse_version(fpath): """ Statically parse the version number from a python file """ - value = static_parse("__version__", fpath) + value = static_parse('__version__', fpath) return value @@ -23,15 +23,15 @@ def static_parse(varname, fpath): import ast if not exists(fpath): - raise ValueError("fpath={!r} does not exist".format(fpath)) - with open(fpath, "r") as file_: + raise ValueError('fpath={!r} does not exist'.format(fpath)) + with open(fpath, 'r') as file_: sourcecode = file_.read() pt = ast.parse(sourcecode) class StaticVisitor(ast.NodeVisitor): def visit_Assign(self, node): for target in node.targets: - if getattr(target, "id", None) == varname: + if getattr(target, 'id', None) == varname: try: self.static_value = node.value.value except AttributeError: @@ -44,7 +44,7 @@ def visit_Assign(self, node): except AttributeError: import warnings - value = "Unknown {}".format(varname) + value = 'Unknown {}'.format(varname) warnings.warn(value) return value @@ -57,16 +57,16 @@ def parse_description(): pandoc --from=markdown --to=rst --output=README.rst README.md python -c "import setup; print(setup.parse_description())" """ - readme_fpath = join(dirname(__file__), "README.rst") + readme_fpath = join(dirname(__file__), 'README.rst') # This breaks on pip install, so check that it exists. if exists(readme_fpath): - with open(readme_fpath, "r") as f: + with open(readme_fpath, 'r') as f: text = f.read() return text - return "" + return '' -def parse_requirements(fname="requirements.txt", versions=False): +def parse_requirements(fname='requirements.txt', versions=False): """ Parse the package dependencies listed in a requirements file but strips specific versioning information. @@ -85,7 +85,7 @@ def parse_requirements(fname="requirements.txt", versions=False): """ require_fpath = fname - def parse_line(line, dpath=""): + def parse_line(line, dpath=''): """ Parse information from a line in a requirements text file @@ -93,75 +93,77 @@ def parse_line(line, dpath=""): line = '-e git+https://a.com/somedep@sometag#egg=SomeDep' """ # Remove inline comments - comment_pos = line.find(" #") + comment_pos = line.find(' #') if comment_pos > -1: line = line[:comment_pos] - if line.startswith("-r "): + if line.startswith('-r '): # Allow specifying requirements in other files - target = join(dpath, line.split(" ")[1]) + target = join(dpath, line.split(' ')[1]) for info in parse_require_file(target): yield info else: # See: https://www.python.org/dev/peps/pep-0508/ - info = {"line": line} - if line.startswith("-e "): - info["package"] = line.split("#egg=")[1] + info = {'line': line} + if line.startswith('-e '): + info['package'] = line.split('#egg=')[1] else: - if "--find-links" in line: + if '--find-links' in line: # setuptools does not seem to handle find links - line = line.split("--find-links")[0] - if ";" in line: - pkgpart, platpart = line.split(";") + line = line.split('--find-links')[0] + if ';' in line: + pkgpart, platpart = line.split(';') # Handle platform specific dependencies # setuptools.readthedocs.io/en/latest/setuptools.html # #declaring-platform-specific-dependencies plat_deps = platpart.strip() - info["platform_deps"] = plat_deps + info['platform_deps'] = plat_deps else: pkgpart = line platpart = None # Remove versioning from the package - pat = "(" + "|".join([">=", "==", ">"]) + ")" + pat = '(' + '|'.join(['>=', '==', '>']) + ')' parts = re.split(pat, pkgpart, maxsplit=1) parts = [p.strip() for p in parts] - info["package"] = parts[0] + info['package'] = parts[0] if len(parts) > 1: op, rest = parts[1:] version = rest # NOQA - info["version"] = (op, version) + info['version'] = (op, version) yield info def parse_require_file(fpath): dpath = dirname(fpath) - with open(fpath, "r") as f: + with open(fpath, 'r') as f: for line in f.readlines(): line = line.strip() - if line and not line.startswith("#"): + if line and not line.startswith('#'): for info in parse_line(line, dpath=dpath): yield info def gen_packages_items(): if exists(require_fpath): for info in parse_require_file(require_fpath): - parts = [info["package"]] - if versions and "version" in info: - if versions == "strict": + parts = [info['package']] + if versions and 'version' in info: + if versions == 'strict': # In strict mode, we pin to the minimum version - if info["version"]: + if info['version']: # Only replace the first >= instance - verstr = "".join(info["version"]).replace(">=", "==", 1) + verstr = ''.join(info['version']).replace( + '>=', '==', 1 + ) parts.append(verstr) else: - parts.extend(info["version"]) - if not sys.version.startswith("3.4"): + parts.extend(info['version']) + if not sys.version.startswith('3.4'): # apparently package_deps are broken in 3.4 - plat_deps = info.get("platform_deps") + plat_deps = info.get('platform_deps') if plat_deps is not None: - parts.append(";" + plat_deps) - item = "".join(parts) + parts.append(';' + plat_deps) + item = ''.join(parts) if item: yield item @@ -199,67 +201,77 @@ def gen_packages_items(): # return requirements -NAME = "cmd_queue" -INIT_PATH = "cmd_queue/__init__.py" +NAME = 'cmd_queue' +INIT_PATH = 'cmd_queue/__init__.py' VERSION = parse_version(INIT_PATH) -if __name__ == "__main__": +if __name__ == '__main__': setupkw = {} - setupkw["install_requires"] = parse_requirements( - "requirements/runtime.txt", versions="loose" + setupkw['install_requires'] = parse_requirements( + 'requirements/runtime.txt', versions='loose' ) - setupkw["extras_require"] = { - "all": parse_requirements("requirements.txt", versions="loose"), - "runtime": parse_requirements("requirements/runtime.txt", versions="loose"), - "tests": parse_requirements("requirements/tests.txt", versions="loose"), - "optional": parse_requirements("requirements/optional.txt", versions="loose"), - "airflow": parse_requirements("requirements/airflow.txt", versions="loose"), - "docs": parse_requirements("requirements/docs.txt", versions="loose"), - "linting": parse_requirements("requirements/linting.txt", versions="loose"), - "all-strict": parse_requirements("requirements.txt", versions="strict"), - "runtime-strict": parse_requirements( - "requirements/runtime.txt", versions="strict" + setupkw['extras_require'] = { + 'all': parse_requirements('requirements.txt', versions='loose'), + 'runtime': parse_requirements( + 'requirements/runtime.txt', versions='loose' + ), + 'tests': parse_requirements('requirements/tests.txt', versions='loose'), + 'optional': parse_requirements( + 'requirements/optional.txt', versions='loose' + ), + 'airflow': parse_requirements( + 'requirements/airflow.txt', versions='loose' + ), + 'docs': parse_requirements('requirements/docs.txt', versions='loose'), + 'linting': parse_requirements( + 'requirements/linting.txt', versions='loose' + ), + 'all-strict': parse_requirements('requirements.txt', versions='strict'), + 'runtime-strict': parse_requirements( + 'requirements/runtime.txt', versions='strict' + ), + 'tests-strict': parse_requirements( + 'requirements/tests.txt', versions='strict' + ), + 'optional-strict': parse_requirements( + 'requirements/optional.txt', versions='strict' ), - "tests-strict": parse_requirements("requirements/tests.txt", versions="strict"), - "optional-strict": parse_requirements( - "requirements/optional.txt", versions="strict" + 'airflow-strict': parse_requirements( + 'requirements/airflow.txt', versions='strict' ), - "airflow-strict": parse_requirements( - "requirements/airflow.txt", versions="strict" + 'docs-strict': parse_requirements( + 'requirements/docs.txt', versions='strict' ), - "docs-strict": parse_requirements("requirements/docs.txt", versions="strict"), - "linting-strict": parse_requirements( - "requirements/linting.txt", versions="strict" + 'linting-strict': parse_requirements( + 'requirements/linting.txt', versions='strict' ), } - setupkw["name"] = NAME - setupkw["version"] = VERSION - setupkw["author"] = "Kitware Inc., Jon Crall" - setupkw["author_email"] = "kitware@kitware.com, jon.crall@kitware.com" - setupkw["url"] = "https://gitlab.kitware.com/computer-vision/cmd_queue" - setupkw["description"] = "The cmd_queue module for a DAG of bash commands" - setupkw["long_description"] = parse_description() - setupkw["long_description_content_type"] = "text/x-rst" - setupkw["license"] = "Apache 2" - setupkw["packages"] = find_packages(".") - setupkw["python_requires"] = ">=3.9" - setupkw["classifiers"] = [ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: Utilities", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", + setupkw['name'] = NAME + setupkw['version'] = VERSION + setupkw['author'] = 'Kitware Inc., Jon Crall' + setupkw['author_email'] = 'kitware@kitware.com, jon.crall@kitware.com' + setupkw['url'] = 'https://gitlab.kitware.com/computer-vision/cmd_queue' + setupkw['description'] = 'The cmd_queue module for a DAG of bash commands' + setupkw['long_description'] = parse_description() + setupkw['long_description_content_type'] = 'text/x-rst' + setupkw['license'] = 'Apache 2' + setupkw['packages'] = find_packages('.') + setupkw['python_requires'] = '>=3.10' + setupkw['classifiers'] = [ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Utilities', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', ] - setupkw["package_data"] = {"": ["requirements/*.txt"]} - setupkw["entry_points"] = { - "console_scripts": [ - "cmd_queue = cmd_queue.__main__:main", + setupkw['package_data'] = {'': ['requirements/*.txt']} + setupkw['entry_points'] = { + 'console_scripts': [ + 'cmd_queue = cmd_queue.__main__:main', ], } setup(**setupkw) diff --git a/tests/test_airflow_queue.py b/tests/test_airflow_queue.py index 33ec628..5f7e854 100644 --- a/tests/test_airflow_queue.py +++ b/tests/test_airflow_queue.py @@ -1,11 +1,10 @@ """Tests for the Airflow backend without pytest fixtures.""" -import ubelt as ub import pytest +import ubelt as ub from cmd_queue.airflow_queue import AirflowQueue - airflow = pytest.importorskip('airflow') @@ -18,7 +17,9 @@ def _test_dpath(name: str) -> ub.Path: def _make_queue(name='cmdq_airflow_demo'): dpath = _test_dpath(name) airflow_home = dpath / 'airflow_home' - return AirflowQueue(name=name, dpath=dpath / 'queue_root', airflow_home=airflow_home) + return AirflowQueue( + name=name, dpath=dpath / 'queue_root', airflow_home=airflow_home + ) def test_finalize_text_contains_dependencies(): @@ -27,7 +28,7 @@ def test_finalize_text_contains_dependencies(): queue.submit('echo second', name='second_task', depends=first) text = queue.finalize_text() - assert "dag = DAG(" in text + assert 'dag = DAG(' in text assert "'finalize_demo'" in text assert "jobs['first_task']" in text assert "jobs['second_task']" in text @@ -37,8 +38,8 @@ def test_finalize_text_contains_dependencies(): def test_airflow_queue_run_executes_in_order(): queue = _make_queue(name='run_demo') outfile = queue.dpath / 'output.txt' - queue.submit(f"echo first >> {outfile}", name='first') - queue.submit(f"echo second >> {outfile}", name='second', depends='first') + queue.submit(f'echo first >> {outfile}', name='first') + queue.submit(f'echo second >> {outfile}', name='second', depends='first') queue.run() diff --git a/tests/test_bash_job_errors.py b/tests/test_bash_job_errors.py index 930f33d..6051048 100644 --- a/tests/test_bash_job_errors.py +++ b/tests/test_bash_job_errors.py @@ -2,9 +2,10 @@ def demo_script(dpath): - script_fpath = (dpath / 'myprog.py') - script_fpath.write_text(ub.codeblock( - ''' + script_fpath = dpath / 'myprog.py' + script_fpath.write_text( + ub.codeblock( + """ #!/usr/env/python def main(): @@ -30,34 +31,46 @@ def main(): if __name__ == '__main__': main() - ''')) + """ + ) + ) return script_fpath def test_bash_job_errors(): import ubelt as ub + dpath = ub.Path.appdir('cmd_queue', 'tests', 'test_bash_job_errors') dpath.delete().ensuredir() - from cmd_queue.serial_queue import BashJob # Demo full boilerplate for a job with no dependencies import sys + + from cmd_queue.serial_queue import BashJob + sys.executable script_fpath = demo_script(dpath) pyexe = sys.executable - self = BashJob(f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=True) - self.print_commands(1, 1) + self = BashJob( + f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=True + ) + self.print_commands(True, True) - self = BashJob(f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=False) - self.print_commands(1, 1) + self = BashJob( + f'{pyexe} {script_fpath} --failflag --steps=4', 'myjob', log=False + ) + self.print_commands(True, True) def test_tmux_queue_errors(): - import ubelt as ub import sys + + import ubelt as ub + import cmd_queue + dpath = ub.Path.appdir('cmd_queue', 'tests', 'test_tmux_queue_errors') dpath.delete().ensuredir() script_fpath = demo_script(dpath) @@ -66,16 +79,29 @@ def test_tmux_queue_errors(): log = True queue = cmd_queue.Queue.create(backend='tmux') - job1 = queue.submit(f'{pyexe} {script_fpath} --steps=3 --steptime=0.5', log=log) - job2 = queue.submit(f'{pyexe} {script_fpath} --steps=2 --steptime=0.5 --failflag', log=log, depends=job1) - job3 = queue.submit(f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', log=log, depends=job2) - job4 = queue.submit(f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', log=log) + job1 = queue.submit( + f'{pyexe} {script_fpath} --steps=3 --steptime=0.5', log=log + ) + job2 = queue.submit( + f'{pyexe} {script_fpath} --steps=2 --steptime=0.5 --failflag', + log=log, + depends=job1, + ) + job3 = queue.submit( + f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', + log=log, + depends=job2, + ) + job4 = queue.submit( + f'{pyexe} {script_fpath} --steps=2 --steptime=0.5', log=log + ) # queue.submit(f'{pyexe} {script_fpath} --steps=2', log=log) - queue.print_commands(1, 1) + queue.print_commands(True, True) queue.write() if not queue.is_available(): import pytest + pytest.skip('Skip tmux test. Tmux is not available') queue.run(block=0) diff --git a/tests/test_bash_variants.py b/tests/test_bash_variants.py index 8b2d2fc..09563aa 100644 --- a/tests/test_bash_variants.py +++ b/tests/test_bash_variants.py @@ -1,11 +1,14 @@ """ Tests for multiple variants of bash job text construction. """ -from cmd_queue.serial_queue import BashJob + import subprocess import tempfile -import ubelt as ub + import kwutil +import ubelt as ub + +from cmd_queue.serial_queue import BashJob def test_primary_bash_job_text_variants(): @@ -14,7 +17,7 @@ def test_primary_bash_job_text_variants(): that makes it easier to manually test common cases. """ main_variants = kwutil.Yaml.coerce( - ''' + """ - __testname__: plain jane cwd: False depends: False @@ -30,19 +33,22 @@ def test_primary_bash_job_text_variants(): log: True with_status: True with_gaurds: True - ''') + """ + ) dep = BashJob('echo hi', name='job1') for variant in main_variants: - job_kwargs = {} if variant['depends']: job_kwargs['depends'] = [dep] if variant['cwd']: job_kwargs['cwd'] = '/foo/bar' if variant['preamble']: - job_kwargs['preamble'] = ['export SETUP_LINE1=1', 'export SETUP_LINE2=2'] + job_kwargs['preamble'] = [ + 'export SETUP_LINE1=1', + 'export SETUP_LINE2=2', + ] finalize_kwargs = ub.udict(variant) & {'with_status', 'with_gaurds'} @@ -59,11 +65,14 @@ def test_primary_bash_job_text_variants(): if variant['__testname__'] == 'plain jane': assert text.strip() == command, ( - 'When there is nothing special, we just return the command as given') + 'When there is nothing special, we just return the command as given' + ) if variant['__testname__'] == 'the works': assert 'pushd "/foo/bar"' in text assert 'popd' in text - assert 'CHDIR_OK' in text, "cwd=True should define CHDIR_OK and guard popd" + assert 'CHDIR_OK' in text, ( + 'cwd=True should define CHDIR_OK and guard popd' + ) assert 'if [[ "$CHDIR_OK" == "1" ]]' in text or 'CHDIR_OK' in text assert 'export SETUP_LINE1=1' in text assert 'export SETUP_LINE2=2' in text @@ -72,27 +81,30 @@ def test_primary_bash_job_text_variants(): def test_bash_job_variants_syntax_grided(): basis = kwutil.Yaml.coerce( - ''' + """ cwd: [True, False] depends: [True, False] preamble: [True, False] log: [True, False] with_status: [True, False] with_gaurds: [True, False] - ''') + """ + ) grid_variants = list(ub.named_product(**basis)) dep = BashJob('echo hi', name='job1') for variant in grid_variants: - job_kwargs = {} if variant['depends']: job_kwargs['depends'] = [dep] if variant['cwd']: job_kwargs['cwd'] = '/foo/bar' if variant['preamble']: - job_kwargs['preamble'] = ['export SETUP_LINE1=1', 'export SETUP_LINE2=2'] + job_kwargs['preamble'] = [ + 'export SETUP_LINE1=1', + 'export SETUP_LINE2=2', + ] finalize_kwargs = ub.udict(variant) & {'with_status', 'with_gaurds'} @@ -116,12 +128,15 @@ def test_bash_job_variants_syntax_grided(): if proc.returncode == 0: print('Parse check is ok') else: - raise AssertionError(f"bash syntax error: \nSTDERR:\n{proc.stderr}\nSCRIPT:\n{text}") + raise AssertionError( + f'bash syntax error: \nSTDERR:\n{proc.stderr}\nSCRIPT:\n{text}' + ) # --- Plain-jane invariant: if nothing special, should equal command if not any(variant.values()): assert text.strip() == 'echo hi', ( - 'When there is nothing special, we just return the command as given') + 'When there is nothing special, we just return the command as given' + ) # --- Preamble should not be echoed if guards are on (i.e. set -x happens after preamble) if variant['preamble']: @@ -132,88 +147,141 @@ def test_bash_job_variants_syntax_grided(): pre_idx = text.find('export SETUP_LINE1=1') x_idx = text.find('set -x') assert pre_idx != -1 and x_idx != -1 and pre_idx < x_idx, ( - 'dont enable echo before preamble') + 'dont enable echo before preamble' + ) # --- Logging behavior if variant['log']: # When log is enabled, we expect tee + pipefail boilerplate - assert 'tee' in text, "log=True should use tee" + assert 'tee' in text, 'log=True should use tee' # Be strict if log_fpath is available on self; otherwise fall back to generic checks if hasattr(self, 'log_fpath'): - assert str(self.log_fpath) in text, "log=True should reference log_fpath" + assert str(self.log_fpath) in text, ( + 'log=True should reference log_fpath' + ) if variant['with_gaurds']: - assert 'set -o pipefail' in text, "log=True should enable pipefail" - assert 'set +o pipefail' in text, "log=True should restore pipefail" + assert 'set -o pipefail' in text, ( + 'log=True should enable pipefail' + ) + assert 'set +o pipefail' in text, ( + 'log=True should restore pipefail' + ) else: # When log is disabled, we should not see pipefail boilerplate - assert 'set -o pipefail' not in text, "log=False should not enable pipefail" - assert 'set +o pipefail' not in text, "log=False should not restore pipefail" + assert 'set -o pipefail' not in text, ( + 'log=False should not enable pipefail' + ) + assert 'set +o pipefail' not in text, ( + 'log=False should not restore pipefail' + ) # tee should not appear unless user command includes it (unlikely in these tests) # If you want to be strict: - assert 'tee ' not in text, "log=False should not insert tee" + assert 'tee ' not in text, 'log=False should not insert tee' # --- Guard behavior: when with_gaurds is enabled, we expect set +e and the brace return-code capture if variant['with_gaurds']: - assert 'set +e' in text, "with_gaurds=True should disable exit-on-error" + assert 'set +e' in text, ( + 'with_gaurds=True should disable exit-on-error' + ) # We should enable xtrace somewhere (unless bookkeeper disables it; in your tests it should not) - assert 'set -x' in text, "with_gaurds=True should enable command echo" + assert 'set -x' in text, ( + 'with_gaurds=True should enable command echo' + ) # Return code capture should be hidden inside brace trick - assert '{ RETURN_CODE=$?' in text, "with_gaurds=True should capture RETURN_CODE in brace trick" - assert 'set +x -e' in text, "with_gaurds=True should disable echo and re-enable -e" + assert '{ RETURN_CODE=$?' in text, ( + 'with_gaurds=True should capture RETURN_CODE in brace trick' + ) + assert 'set +x -e' in text, ( + 'with_gaurds=True should disable echo and re-enable -e' + ) # Ensure we don't have a noisy RETURN_CODE=$? line outside the brace trick - bad_lines = [ln for ln in text.splitlines() if ln.strip().startswith('RETURN_CODE=$?')] + bad_lines = [ + ln + for ln in text.splitlines() + if ln.strip().startswith('RETURN_CODE=$?') + ] assert not bad_lines, ( - f"RETURN_CODE capture should be in brace trick, found: {bad_lines}") + f'RETURN_CODE capture should be in brace trick, found: {bad_lines}' + ) else: # If guards are off, we should not see xtrace toggles or the brace trick capture - assert 'set -x' not in text, "with_gaurds=False should not enable xtrace" - assert 'set +x -e' not in text, "with_gaurds=False should not include brace trick toggles" - assert '{ RETURN_CODE=$?' not in text, "with_gaurds=False should not include brace trick capture" + assert 'set -x' not in text, ( + 'with_gaurds=False should not enable xtrace' + ) + assert 'set +x -e' not in text, ( + 'with_gaurds=False should not include brace trick toggles' + ) + assert '{ RETURN_CODE=$?' not in text, ( + 'with_gaurds=False should not include brace trick capture' + ) # --- Status behavior if variant['with_status']: - assert 'Mark job as running' in text, "with_status=True should mark job as running" - assert 'Mark job as stopped' in text, "with_status=True should mark job as stopped" - assert 'printf "pass" >' in text, "with_status=True should write pass marker" - assert 'printf "fail" >' in text, "with_status=True should write fail marker" - assert 'stat' in text or 'status' in text, "with_status=True should dump status JSON" + assert 'Mark job as running' in text, ( + 'with_status=True should mark job as running' + ) + assert 'Mark job as stopped' in text, ( + 'with_status=True should mark job as stopped' + ) + assert 'printf "pass" >' in text, ( + 'with_status=True should write pass marker' + ) + assert 'printf "fail" >' in text, ( + 'with_status=True should write fail marker' + ) + assert 'stat' in text or 'status' in text, ( + 'with_status=True should dump status JSON' + ) # Make sure RETURN_CODE is referenced in final status conditional assert '"$RETURN_CODE"' in text or 'RETURN_CODE' in text, ( - "with_status=True should reference RETURN_CODE") + 'with_status=True should reference RETURN_CODE' + ) else: # When status is off, we should not emit pass/fail markers - assert 'printf "pass" >' not in text, "with_status=False should not write pass marker" - assert 'printf "fail" >' not in text, "with_status=False should not write fail marker" + assert 'printf "pass" >' not in text, ( + 'with_status=False should not write pass marker' + ) + assert 'printf "fail" >' not in text, ( + 'with_status=False should not write fail marker' + ) assert 'Mark job as running' not in text assert 'Mark job as stopped' not in text # --- Dependency guard behavior is only emitted when status is on and depends exist if variant['depends'] and variant['with_status']: - assert 'if [ -f' in text, "depends+with_status should emit dependency condition" - assert 'RETURN_CODE=126' in text, "depends+with_status should set skip RETURN_CODE=126" + assert 'if [ -f' in text, ( + 'depends+with_status should emit dependency condition' + ) + assert 'RETURN_CODE=126' in text, ( + 'depends+with_status should set skip RETURN_CODE=126' + ) else: # Be careful: user command might contain this string, but in these tests it won't. assert 'RETURN_CODE=126' not in text, ( - "no depends or no status: should not insert skip RETURN_CODE") + 'no depends or no status: should not insert skip RETURN_CODE' + ) # --- CWD behavior if variant['cwd']: - assert 'pushd' in text, "cwd=True should use pushd" - assert 'popd' in text, "cwd=True should include popd" + assert 'pushd' in text, 'cwd=True should use pushd' + assert 'popd' in text, 'cwd=True should include popd' else: - assert 'pushd' not in text, "cwd=False should not include pushd" - assert 'popd' not in text, "cwd=False should not include popd" + assert 'pushd' not in text, 'cwd=False should not include pushd' + assert 'popd' not in text, 'cwd=False should not include popd' # --- If we emit internal conditional checks (preamble/cwd), they must be closed properly if variant['cwd'] or variant['preamble']: # If you use a recognizable comment/tag, assert it exists if 'internal condition check' in text: - assert 'fi # internal condition check' in text, "internal if must be closed" + assert 'fi # internal condition check' in text, ( + 'internal if must be closed' + ) else: # Generic safety: at least ensure the count of 'if [[ ' and 'fi' isn't wildly off # (This is loose on purpose to avoid false positives with outer dependency if.) assert text.count('if [[ ') <= text.count('fi'), ( - "seems like an internal if may be missing a fi") + 'seems like an internal if may be missing a fi' + ) # --- Optional: ordering sanity when guards+status on # Ensure xtrace starts after "Mark job as running" and stops before "Mark job as stopped" @@ -223,28 +291,28 @@ def test_bash_job_variants_syntax_grided(): stopped_idx = text.find('Mark job as stopped') if running_idx != -1 and x_idx != -1 and stopped_idx != -1: assert running_idx < x_idx < stopped_idx, ( - "xtrace should not include boilerplate status dump; it should wrap the payload") + 'xtrace should not include boilerplate status dump; it should wrap the payload' + ) n_checks = len(grid_variants) print(f'Ran all n_checks={n_checks}') def test_bashjob_exec_preamble_fail(): - with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - workdir = tmp_path / "work" + workdir = tmp_path / 'work' workdir.mkdir() # Command would create a file if it ran — use that to detect it was skipped - outfile = tmp_path / "ran.txt" + outfile = tmp_path / 'ran.txt' job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(workdir)) job.preamble = ['false'] # fail-fast preamble job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -258,7 +326,7 @@ def test_bashjob_exec_preamble_fail(): ) assert job.fail_fpath.exists() - assert not outfile.exists(), "command should not run if preamble fails" + assert not outfile.exists(), 'command should not run if preamble fails' status = kwutil.Json.load(job.stat_fpath) assert status['ret'] != 0 @@ -267,26 +335,31 @@ def test_bashjob_exec_preamble_fail(): def test_bashjob_exec_depends_met_runs(): with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - workdir = tmp_path / "work" + workdir = tmp_path / 'work' workdir.mkdir() dep = BashJob('echo dep', name='dep_job') - dep.pass_fpath = tmp_path / "dep_job.pass" - dep.fail_fpath = tmp_path / "dep_job.fail" - dep.stat_fpath = tmp_path / "dep_job.status.json" + dep.pass_fpath = tmp_path / 'dep_job.pass' + dep.fail_fpath = tmp_path / 'dep_job.fail' + dep.stat_fpath = tmp_path / 'dep_job.status.json' # Create dependency pass marker dep.pass_fpath.parent.mkdir(parents=True, exist_ok=True) dep.pass_fpath.write_text('pass') - outfile = tmp_path / "ran.txt" - job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(workdir), depends=[dep]) + outfile = tmp_path / 'ran.txt' + job = BashJob( + f'echo ran > "{outfile}"', + name='job2', + cwd=str(workdir), + depends=[dep], + ) job.preamble = ['export SETUP_LINE1=1'] job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -300,8 +373,8 @@ def test_bashjob_exec_depends_met_runs(): check=False, ) - assert outfile.exists(), "command should run if dependency is met" - assert job.pass_fpath.exists(), "job should pass" + assert outfile.exists(), 'command should run if dependency is met' + assert job.pass_fpath.exists(), 'job should pass' assert not job.fail_fpath.exists() status = kwutil.Json.load(job.stat_fpath) @@ -311,24 +384,30 @@ def test_bashjob_exec_depends_met_runs(): def test_bashjob_exec_depends_unmet_skips(): with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - workdir = tmp_path / "work" + workdir = tmp_path / 'work' workdir.mkdir() dep = BashJob('echo dep', name='dep_job') - dep.pass_fpath = tmp_path / "dep_job.pass" - dep.fail_fpath = tmp_path / "dep_job.fail" - dep.stat_fpath = tmp_path / "dep_job.status.json" + dep.pass_fpath = tmp_path / 'dep_job.pass' + dep.fail_fpath = tmp_path / 'dep_job.fail' + dep.stat_fpath = tmp_path / 'dep_job.status.json' # Do NOT create dep.pass_fpath => dependency unmet - outfile = tmp_path / "ran.txt" - job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(workdir), depends=[dep]) + outfile = tmp_path / 'ran.txt' + job = BashJob( + f'echo ran > "{outfile}"', + name='job2', + cwd=str(workdir), + depends=[dep], + ) job.preamble = ['export SETUP_LINE1=1'] job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' + job.skip_fpath = tmp_path / 'job2.skip' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -342,9 +421,15 @@ def test_bashjob_exec_depends_unmet_skips(): check=False, ) - assert not outfile.exists(), "command should not run if dependency is unmet" - # With current semantics, skip sets RETURN_CODE=126, which counts as fail - assert job.fail_fpath.exists(), "skipped job should be marked as fail (ret=126)" + assert not outfile.exists(), ( + 'command should not run if dependency is unmet' + ) + # Skipped jobs (deps unmet, RC=126) write skip_fpath only — they + # are NOT also marked as failed. + assert job.skip_fpath.exists(), 'skipped job should be marked as skip' + assert not job.fail_fpath.exists(), ( + 'skipped job should not be marked as fail' + ) assert not job.pass_fpath.exists() status = kwutil.Json.load(job.stat_fpath) @@ -355,17 +440,19 @@ def test_bashjob_exec_cwd_missing_skips_command(): with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - missing_dir = tmp_path / "does_not_exist" + missing_dir = tmp_path / 'does_not_exist' assert not missing_dir.exists() - outfile = tmp_path / "ran.txt" - job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(missing_dir)) + outfile = tmp_path / 'ran.txt' + job = BashJob( + f'echo ran > "{outfile}"', name='job2', cwd=str(missing_dir) + ) job.preamble = ['export SETUP_LINE1=1'] job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -379,8 +466,8 @@ def test_bashjob_exec_cwd_missing_skips_command(): check=False, ) - assert job.fail_fpath.exists(), "missing cwd should mark job as failed" - assert not outfile.exists(), "command should not run if cwd pushd fails" + assert job.fail_fpath.exists(), 'missing cwd should mark job as failed' + assert not outfile.exists(), 'command should not run if cwd pushd fails' assert not job.pass_fpath.exists() status = kwutil.Json.load(job.stat_fpath) @@ -390,17 +477,17 @@ def test_bashjob_exec_cwd_missing_skips_command(): def test_bashjob_exec_happy_path(): with tempfile.TemporaryDirectory() as tmp_path: tmp_path = ub.Path(tmp_path) - workdir = tmp_path / "work" + workdir = tmp_path / 'work' workdir.mkdir() - outfile = tmp_path / "ran.txt" + outfile = tmp_path / 'ran.txt' job = BashJob(f'echo ran > "{outfile}"', name='job2', cwd=str(workdir)) job.preamble = ['export SETUP_LINE1=1', 'export SETUP_LINE2=2'] job.log = False - job.stat_fpath = tmp_path / "job2.status.json" - job.pass_fpath = tmp_path / "job2.pass" - job.fail_fpath = tmp_path / "job2.fail" + job.stat_fpath = tmp_path / 'job2.status.json' + job.pass_fpath = tmp_path / 'job2.pass' + job.fail_fpath = tmp_path / 'job2.fail' text = job.finalize_text(with_status=True, with_gaurds=True) subprocess.run(['bash', '-n'], input=text, text=True, check=True) @@ -414,9 +501,9 @@ def test_bashjob_exec_happy_path(): check=False, ) - assert outfile.exists(), "command should run on happy path" - assert job.pass_fpath.exists(), "pass marker should exist" - assert not job.fail_fpath.exists(), "fail marker should not exist" + assert outfile.exists(), 'command should run on happy path' + assert job.pass_fpath.exists(), 'pass marker should exist' + assert not job.fail_fpath.exists(), 'fail marker should not exist' status = kwutil.Json.load(job.stat_fpath) assert status['ret'] == 0 diff --git a/tests/test_cli.py b/tests/test_cli.py index e18d091..8c3389b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,14 +1,14 @@ - def test_cli(): """ Ensure the CLI works as expected """ import ubelt as ub + dpath = ub.Path.appdir('cmd_queue/tests/tests_cli').ensuredir() bash_text = ub.codeblock( - r''' + r""" cmd_queue new testqueue1 cmd_queue submit --jobname "job1" -- testqueue1 \ @@ -20,7 +20,8 @@ def test_cli(): cmd_queue show testqueue1 cmd_queue run testqueue1 --backend=serial - ''') + """ + ) fpath = dpath / 'test_script.sh' fpath.write_text(bash_text) @@ -35,12 +36,13 @@ def test_cli_single_executable(): """ import ubelt as ub + dpath = ub.Path.appdir('cmd_queue/tests/tests_cli').ensuredir() true_exe = ub.find_exe('true') bash_text = ub.codeblock( - fr''' + rf""" cmd_queue new testqueue2 cmd_queue submit --jobname "job1" -- testqueue2 \ @@ -52,7 +54,8 @@ def test_cli_single_executable(): cmd_queue show testqueue2 cmd_queue run testqueue2 --backend=serial - ''') + """ + ) fpath = dpath / 'test_script.sh' fpath.write_text(bash_text) diff --git a/tests/test_errors.py b/tests/test_errors.py index 4b9316f..cb2d1c3 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,7 +1,7 @@ - def test_failures_on_each_backend(): # Test case where a job fails import cmd_queue + backends = cmd_queue.Queue.available_backends() for backend in backends: self = cmd_queue.Queue.create(backend=backend) diff --git a/tests/test_import.py b/tests/test_import.py index 086338f..46d88b6 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -1,3 +1,4 @@ def test_import(): import cmd_queue + print(f'cmd_queue={cmd_queue}') diff --git a/tests/test_slurm_variants.py b/tests/test_slurm_variants.py index 85852ef..2baf9b8 100644 --- a/tests/test_slurm_variants.py +++ b/tests/test_slurm_variants.py @@ -1,6 +1,7 @@ # test_slurm_preamble_insertion.py import shlex -from cmd_queue.slurm_queue import SlurmQueue, SlurmJob + +from cmd_queue.slurm_queue import SlurmJob, SlurmQueue def _extract_wrap_payload(sbatch_args): @@ -9,12 +10,14 @@ def _extract_wrap_payload(sbatch_args): extract the string passed to --wrap and unquote it. """ # sbatch_args contains an entry like: '--wrap \'\'' - wrap_items = [item for item in sbatch_args if item.startswith("--wrap ")] - assert len(wrap_items) == 1, f"Expected exactly one --wrap item, got: {wrap_items}" + wrap_items = [item for item in sbatch_args if item.startswith('--wrap ')] + assert len(wrap_items) == 1, ( + f'Expected exactly one --wrap item, got: {wrap_items}' + ) wrap_item = wrap_items[0] # split once: "--wrap " - _, quoted_payload = wrap_item.split(" ", 1) + _, quoted_payload = wrap_item.split(' ', 1) # The payload is shlex.quote(...)'d in the implementation payload = shlex.split(quoted_payload)[0] @@ -23,12 +26,12 @@ def _extract_wrap_payload(sbatch_args): def test_slurm_wrap_contains_global_then_job_preamble_then_command(): # Global preamble: specified on the queue - global_preamble = ["echo GLOBAL1", "echo GLOBAL2"] + global_preamble = ['echo GLOBAL1', 'echo GLOBAL2'] # Job preamble: specified per submit - job_preamble = "echo JOB1" + job_preamble = 'echo JOB1' - command = "echo RUN" + command = 'echo RUN' queue = SlurmQueue(preamble=global_preamble) @@ -42,31 +45,31 @@ def test_slurm_wrap_contains_global_then_job_preamble_then_command(): payload = _extract_wrap_payload(sbatch_args) # The payload should be a single shell line with && joining - expected = " && ".join(global_preamble + [job_preamble, command]) + expected = ' && '.join(global_preamble + [job_preamble, command]) # Exact match is reasonable here because payload construction is deterministic - assert payload == expected, f"\nExpected:\n{expected}\nGot:\n{payload}" + assert payload == expected, f'\nExpected:\n{expected}\nGot:\n{payload}' def test_slurm_wrap_omits_missing_preambles(): # No global preamble, no job preamble queue = SlurmQueue(preamble=None) - job = queue.submit("echo ONLYCMD", preamble=None) + job = queue.submit('echo ONLYCMD', preamble=None) sbatch_args = job._build_sbatch_args(global_preamble=queue.header_commands) payload = _extract_wrap_payload(sbatch_args) - assert payload == "echo ONLYCMD" + assert payload == 'echo ONLYCMD' # Global preamble only - queue = SlurmQueue(preamble=["echo GLOBAL"]) - job = queue.submit("echo CMD", preamble=None) + queue = SlurmQueue(preamble=['echo GLOBAL']) + job = queue.submit('echo CMD', preamble=None) sbatch_args = job._build_sbatch_args(global_preamble=queue.header_commands) payload = _extract_wrap_payload(sbatch_args) - assert payload == "echo GLOBAL && echo CMD" + assert payload == 'echo GLOBAL && echo CMD' # Job preamble only queue = SlurmQueue(preamble=None) - job = queue.submit("echo CMD", preamble="echo JOB") + job = queue.submit('echo CMD', preamble='echo JOB') sbatch_args = job._build_sbatch_args(global_preamble=queue.header_commands) payload = _extract_wrap_payload(sbatch_args) - assert payload == "echo JOB && echo CMD" + assert payload == 'echo JOB && echo CMD' diff --git a/tests/test_submit_log_flag.py b/tests/test_submit_log_flag.py new file mode 100644 index 0000000..1658824 --- /dev/null +++ b/tests/test_submit_log_flag.py @@ -0,0 +1,83 @@ +""" +Test that ``Queue.submit(..., log=True)`` plumbs the flag through to the +underlying ``BashJob`` and that the finalized script tees the command. + +This is the integration boundary: ``test_bash_variants.py`` covers +``BashJob`` directly (i.e. the renderer), but downstream callers +(kwdagger and others) reach ``BashJob`` only via ``Queue.submit(...)``. +A regression where ``submit`` drops or shadows the ``log`` kwarg would +silently disable tee logging without any other test catching it, which +is exactly the kind of thing this test is here to catch. +""" +import cmd_queue + + +def _command_section(text: str) -> str: + """Return the slice of ``text`` between the ``# command:`` marker and + the ``# after_command:`` marker. Lets the assertion focus on the + actual job command and not bookkeeping lines that may also contain + paths the test doesn't care about. + """ + start = text.find('# command:') + end = text.find('# after_command:') + if start == -1 or end == -1: + return text + return text[start:end] + + +def test_submit_with_log_true_produces_tee(): + queue = cmd_queue.Queue.create(backend='serial', name='log-flag-true', size=1) + job = queue.submit('echo hi', name='job1', log=True) + + assert job.log is True, 'log=True should land on BashJob.log' + + text = job.finalize_text(with_status=True, with_gaurds=True) + cmd = _command_section(text) + + assert '| tee ' in cmd, ( + 'Queue.submit(log=True) should produce a tee in the rendered ' + 'command section. Got:\n' + cmd + ) + assert str(job.log_fpath) in cmd, ( + 'Tee target must be the BashJob.log_fpath so log inspection ' + 'tools find it. Got:\n' + cmd + ) + + +def test_submit_with_log_false_omits_tee(): + queue = cmd_queue.Queue.create(backend='serial', name='log-flag-false', size=1) + job = queue.submit('echo hi', name='job1', log=False) + + assert job.log is False, 'log=False should land on BashJob.log' + + text = job.finalize_text(with_status=True, with_gaurds=True) + cmd = _command_section(text) + + assert '| tee ' not in cmd, ( + 'Queue.submit(log=False) must NOT add a tee to the command. ' + 'Got:\n' + cmd + ) + + +def test_submit_log_default_omits_tee(): + """The current ``BashJob`` default is ``log=False`` for backward + compatibility. If a caller does not pass ``log``, no tee should + appear. Tracked here so any default flip is caught explicitly. + """ + queue = cmd_queue.Queue.create(backend='serial', name='log-flag-default', size=1) + job = queue.submit('echo hi', name='job1') + + assert job.log is False, 'BashJob.log default is False' + + text = job.finalize_text(with_status=True, with_gaurds=True) + cmd = _command_section(text) + assert '| tee ' not in cmd, ( + 'Default Queue.submit (no log kwarg) must NOT tee. Got:\n' + cmd + ) + + +if __name__ == '__main__': + test_submit_with_log_true_produces_tee() + test_submit_with_log_false_omits_tee() + test_submit_log_default_omits_tee() + print('All submit log-flag tests passed.') diff --git a/tests/test_tmux_attach.py b/tests/test_tmux_attach.py new file mode 100644 index 0000000..9dd168a --- /dev/null +++ b/tests/test_tmux_attach.py @@ -0,0 +1,242 @@ +""" +Tests for the ``monitor`` argument on ``Queue.run()`` for the tmux and +slurm backends — specifically the new ``'hybrid'`` mode. + +The full end-to-end behavior (rich.Live + cbreak + tmux attach) requires +an interactive TTY and a live tmux server, so these tests stay at the +plumbing layer: + +* ``monitor='hybrid'`` (the default) on an inline-monitor run spawns the + side ``cmd_queue monitor`` tmux session and tears it down afterwards. +* ``monitor='inline'`` leaves the existing inline-only behavior intact + (no side spawn). +* ``monitor='hybrid'`` falls back gracefully when tmux is missing. +* The renderable hint and the textual app expose the right keybinding. + +The tmux helpers are monkeypatched so the tests run without a tmux server. +""" +from __future__ import annotations + +from typing import Any, Dict, List + +import pytest + + +def _patch_tmux_helpers(monkeypatch: pytest.MonkeyPatch) -> Dict[str, List[Any]]: + """Replace the tmux helper static methods with recorders. + + Returns a dict of call-log lists keyed by helper name so each test + can assert on what the run() path triggered. + """ + from cmd_queue.util import util_tmux + + calls: Dict[str, List[Any]] = { + 'spawn': [], + 'kill': [], + 'has': [], + 'attach_or_switch': [], + } + + def fake_spawn( + session_name: str, + manifest_path: Any, + attach: bool = True, + verbose: int = 0, + extra_args: Any = None, + ) -> Dict[str, Any]: + calls['spawn'].append( + { + 'session_name': session_name, + 'manifest_path': str(manifest_path), + 'attach': attach, + 'extra_args': list(extra_args or []), + } + ) + return {'session_name': session_name, 'attach_command': 'noop'} + + def fake_kill(session_name: str, verbose: int = 3) -> None: + calls['kill'].append(session_name) + + def fake_has(session_name: str) -> bool: + calls['has'].append(session_name) + # Pretend the session exists between spawn and kill so the + # finally-clause actually exercises the kill path. + return True + + def fake_attach(session_name: str) -> None: + calls['attach_or_switch'].append(session_name) + + monkeypatch.setattr( + util_tmux.tmux, 'spawn_monitor_session', staticmethod(fake_spawn) + ) + monkeypatch.setattr( + util_tmux.tmux, 'kill_session', staticmethod(fake_kill) + ) + monkeypatch.setattr( + util_tmux.tmux, 'has_session', staticmethod(fake_has) + ) + monkeypatch.setattr( + util_tmux.tmux, 'attach_or_switch', staticmethod(fake_attach) + ) + return calls + + +def _make_tmux_queue(tmp_path): + from cmd_queue.tmux_queue import TMUXMultiQueue + + queue = TMUXMultiQueue(size=1, name='tmux-attach-test', dpath=tmp_path) + queue.submit('true') + return queue + + +def test_hybrid_mode_spawns_and_kills_side_session(monkeypatch, tmp_path): + """With ``monitor='hybrid'`` the dispatcher must spawn the side + session before invoking ``self.monitor()`` and kill it afterwards + (so we don't leak tmux sessions per run).""" + calls = _patch_tmux_helpers(monkeypatch) + monkeypatch.setattr('ubelt.find_exe', lambda name: f'/usr/bin/{name}') + queue = _make_tmux_queue(tmp_path) + + monitor_calls: List[Dict[str, Any]] = [] + + def fake_monitor(self, **kwargs): + # Record what the dispatcher passed and assert the session was + # already spawned by this point. + monitor_calls.append(kwargs) + return {'status': 'done'} + + monkeypatch.setattr( + 'cmd_queue.tmux_queue.TMUXMultiQueue.monitor', fake_monitor + ) + + queue._dispatch_monitor( + monitor='hybrid', + manifest_path=tmp_path / 'manifest.json', + onfail='kill', + onexit='', + with_textual='auto', + ) + + assert len(calls['spawn']) == 1, 'side session must be spawned exactly once' + spawn = calls['spawn'][0] + assert spawn['session_name'].startswith('cmdq-monitor-') + assert '--onfail=kill' in spawn['extra_args'] + assert spawn['attach'] is False, ( + 'spawn_monitor_session(attach=False) — the inline path takes ' + 'over the foreground separately via the [a] keybind' + ) + + assert len(monitor_calls) == 1 + assert monitor_calls[0]['side_session'] == spawn['session_name'] + + assert calls['kill'] == [spawn['session_name']], ( + 'side session must be killed in the dispatcher finally-clause' + ) + + +def test_inline_mode_does_not_spawn(monkeypatch, tmp_path): + """``monitor='inline'`` is the explicit opt-out: no side session + should be created and ``monitor()`` should be invoked without a + ``side_session`` argument (so the inline UI keeps its old shape).""" + calls = _patch_tmux_helpers(monkeypatch) + monkeypatch.setattr('ubelt.find_exe', lambda name: f'/usr/bin/{name}') + queue = _make_tmux_queue(tmp_path) + + seen: List[Dict[str, Any]] = [] + + def fake_monitor(self, **kwargs): + seen.append(kwargs) + return None + + monkeypatch.setattr( + 'cmd_queue.tmux_queue.TMUXMultiQueue.monitor', fake_monitor + ) + queue._dispatch_monitor( + monitor='inline', + manifest_path=tmp_path / 'manifest.json', + onfail='kill', + onexit='', + with_textual='auto', + ) + + assert calls['spawn'] == [], "inline mode must not spawn a side session" + assert calls['kill'] == [], 'no kill if nothing was spawned' + assert 'side_session' not in seen[0], ( + 'inline path goes through the legacy monitor() signature, ' + 'with no side_session kwarg' + ) + + +def test_hybrid_falls_back_when_tmux_missing(monkeypatch, tmp_path): + """If tmux is unavailable, hybrid degrades gracefully to inline- + only (a warning is emitted and ``monitor()`` runs with + ``side_session=None``).""" + calls = _patch_tmux_helpers(monkeypatch) + monkeypatch.setattr('ubelt.find_exe', lambda name: None) + queue = _make_tmux_queue(tmp_path) + + seen: List[Dict[str, Any]] = [] + + def fake_monitor(self, **kwargs): + seen.append(kwargs) + return None + + monkeypatch.setattr( + 'cmd_queue.tmux_queue.TMUXMultiQueue.monitor', fake_monitor + ) + with pytest.warns(UserWarning, match='tmux not found'): + queue._dispatch_monitor( + monitor='hybrid', + manifest_path=tmp_path / 'manifest.json', + onfail='kill', + onexit='', + with_textual='auto', + ) + + assert calls['spawn'] == [], 'no tmux → no side spawn' + assert seen[0]['side_session'] is None + + +def test_attach_hint_renderable_mentions_session(): + """The footer text under the live status table must call out the + keybinding and name the session, otherwise users won't discover the + feature.""" + from cmd_queue.tmux_queue import _attach_hint_renderable + + hint = _attach_hint_renderable('cmdq-monitor-foo') + rendered = hint.plain # rich.Text → strip markup + assert '[a]' in rendered + assert '[q]' in rendered + assert 'cmdq-monitor-foo' in rendered + + +def test_textual_app_binds_a_only_when_attach_session_set(): + """The textual app should only register the 'a' keybind when an + attach session is actually wired up — otherwise the binding would + flag-and-shut-down with nowhere to attach to.""" + pytest.importorskip('textual') + try: + from cmd_queue.monitor_app import CmdQueueMonitorApp + except ImportError: + pytest.skip('textual monitor app is unavailable on this build') + if CmdQueueMonitorApp is None: # gated in tmux_queue.py + pytest.skip('textual monitor app is gated off') + + def table_fn(): + return None, True, {} + + app_with = CmdQueueMonitorApp(table_fn, attach_session='cmdq-monitor-x') + app_without = CmdQueueMonitorApp(table_fn) + + assert app_with.attach_session == 'cmdq-monitor-x' + assert app_with.attach_requested is False + assert app_without.attach_session is None + assert hasattr(app_with, 'action_attach_monitor'), ( + 'attach action must exist so the binding has a target' + ) + + +if __name__ == '__main__': + import sys + + sys.exit(pytest.main([__file__, '-v'])) diff --git a/tests/tests_mixed_hardware_tmux.py b/tests/tests_mixed_hardware_tmux.py index e942d50..d79c5d9 100644 --- a/tests/tests_mixed_hardware_tmux.py +++ b/tests/tests_mixed_hardware_tmux.py @@ -5,8 +5,10 @@ def test_mixed_hardware(): tree_jobs. """ - import cmd_queue import ubelt as ub + + import cmd_queue + backend = 'tmux' gres = [0, 1] @@ -14,24 +16,61 @@ def test_mixed_hardware(): dpath = ub.Path.appdir('cmd_queue', 'tests', 'test_mixed_hardware') environ = {} - queue = cmd_queue.Queue.create(backend, name='test_mixed_hardware', - size=2, environ=environ, - dpath=dpath, gres=gres) + queue = cmd_queue.Queue.create( + backend, + name='test_mixed_hardware', + size=2, + environ=environ, + dpath=dpath, + gres=gres, + ) import itertools as it + counter = it.count(0) def submit_tree(queue, need_pred_pxl=True): index = next(counter) if need_pred_pxl: - pred_pxl_job = queue.submit('echo "pred_pxl: $CUDA_VISIBLE_DEVICES"', name=f'pred_pxl_{index}', depends=None, cpus=5, gpus=1) + pred_pxl_job = queue.submit( + 'echo "pred_pxl: $CUDA_VISIBLE_DEVICES"', + name=f'pred_pxl_{index}', + depends=None, + cpus=5, + gpus=1, + ) else: pred_pxl_job = None - queue.submit('echo "eval_pxl: $CUDA_VISIBLE_DEVICES"', name=f'eval_pxl_{index}', depends=pred_pxl_job, cpus=2) - queue.submit('echo "pred_trk: $CUDA_VISIBLE_DEVICES"', name=f'pred_trk_{index}', depends=pred_pxl_job, cpus=2) - queue.submit('echo "eval_trk: $CUDA_VISIBLE_DEVICES"', name=f'eval_trk_{index}', depends=f'pred_trk_{index}', cpus=2) - queue.submit('echo "pred_act: $CUDA_VISIBLE_DEVICES"', name=f'pred_act_{index}', depends=pred_pxl_job, cpus=2) - queue.submit('echo "eval_act: $CUDA_VISIBLE_DEVICES"', name=f'eval_act_{index}', depends=f'pred_act_{index}', cpus=2) + queue.submit( + 'echo "eval_pxl: $CUDA_VISIBLE_DEVICES"', + name=f'eval_pxl_{index}', + depends=pred_pxl_job, + cpus=2, + ) + queue.submit( + 'echo "pred_trk: $CUDA_VISIBLE_DEVICES"', + name=f'pred_trk_{index}', + depends=pred_pxl_job, + cpus=2, + ) + queue.submit( + 'echo "eval_trk: $CUDA_VISIBLE_DEVICES"', + name=f'eval_trk_{index}', + depends=f'pred_trk_{index}', + cpus=2, + ) + queue.submit( + 'echo "pred_act: $CUDA_VISIBLE_DEVICES"', + name=f'pred_act_{index}', + depends=pred_pxl_job, + cpus=2, + ) + queue.submit( + 'echo "eval_act: $CUDA_VISIBLE_DEVICES"', + name=f'eval_act_{index}', + depends=f'pred_act_{index}', + cpus=2, + ) submit_tree(queue) submit_tree(queue)