OpenPipe
diff --git a/‎pyproject.toml‎
Lines changed: 11 additions & 7 deletions b/‎pyproject.toml‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎src/art/__init__.py‎
Lines changed: 0 additions & 9 deletions b/‎src/art/__init__.py‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎src/art/pipeline_trainer/binary_prefix_tool_pipeline.py‎
Lines changed: 7 additions & 1 deletion b/‎src/art/pipeline_trainer/binary_prefix_tool_pipeline.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/art/pipeline_trainer/yes_no_maybe_pipeline.py‎
Lines changed: 2 additions & 1 deletion b/‎src/art/pipeline_trainer/yes_no_maybe_pipeline.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/art/tinker/cookbook_v/__init__.py‎ b/‎src/art/tinker/cookbook_v/__init__.py‎
diff --git a/‎src/art/tinker/cookbook_v/hyperparam_utils.py‎
Lines changed: 192 additions & 0 deletions b/‎src/art/tinker/cookbook_v/hyperparam_utils.py‎
Lines changed: 192 additions & 0 deletions
diff --git a/‎src/art/tinker/cookbook_v/image_processing_utils.py‎
Lines changed: 55 additions & 0 deletions b/‎src/art/tinker/cookbook_v/image_processing_utils.py‎
Lines changed: 55 additions & 0 deletions
@@ -51,6 +51,17 @@ langgraph = [
     "langgraph>=0.6.2",
     "langchain-openai>=0.3.27",
 ]
+tinker = [
+    "fastapi>=0.128.0",
+    "huggingface_hub",
+    "numpy",
+    "pillow",
+    "pydantic>=2.12.5",
+    "tinker>=0.8.1",
+    "torch>=2.8.0",
+    "transformers>=4.55.2,<=4.57.3",
+    "uvicorn>=0.35.0",
+]
 
 [project.scripts]
 art = "art.cli:app"
@@ -115,7 +126,6 @@ unused-ignore-comment = "ignore"
 allowed-unresolved-imports = [
     # tinker deps
     "tinker.**",
-    "tinker_cookbook.**",
     # backend deps
     "accelerate.**",
     "awscli.**",
@@ -166,12 +176,6 @@ dev = [
     "pyarrow>=15.0.0",
     "prek>=0.2.29",
 ]
-tinker = [
-    "fastapi>=0.128.0",
-    "tinker>=0.8.1",
-    "tinker-cookbook>=0.1.0",
-    "uvicorn>=0.35.0",
-]
 
 [tool.uv.sources]
 panza = { git = "https://github.com/corbt/panza.git" }
@@ -57,13 +57,6 @@ def __init__(self, **kwargs):
 from .local import LocalBackend
 from .model import Model, TrainableModel
 from .serverless import ServerlessBackend
-
-try:
-    from .tinker import TinkerBackend
-    from .tinker_native import TinkerNativeBackend
-except ModuleNotFoundError:
-    TinkerBackend = None  # type: ignore[assignment]
-    TinkerNativeBackend = None  # type: ignore[assignment]
 from .trajectories import Trajectory, TrajectoryGroup
 from .types import (
     LocalTrainResult,
@@ -102,5 +95,3 @@ def __init__(self, **kwargs):
     "capture_yielded_trajectory",
     "yield_trajectory",
 ]
-if TinkerBackend is not None:
-    __all__.extend(["TinkerBackend", "TinkerNativeBackend"])
@@ -7,6 +7,7 @@
 from pathlib import Path
 import re
 from typing import Any, cast
+import uuid
 
 from dotenv import load_dotenv
 from openai.types.chat.chat_completion_tool_choice_option_param import (
@@ -16,6 +17,7 @@
 import polars as pl
 
 import art
+from art.tinker_native import TinkerNativeBackend
 
 from . import PipelineTrainer, make_group_rollout_fn
 
@@ -178,6 +180,8 @@ async def main() -> None:
         "BASE_MODEL", "Qwen/Qwen3-4B-Instruct-2507"
     )  # Qwen/Qwen3-30B-A3B-Instruct-2507
     model_name = os.environ.get("MODEL_NAME", "pipeline-binary-prefix-tool")
+    run_suffix = os.environ.get("RUN_SUFFIX") or uuid.uuid4().hex[:8]
+    model_name = f"{model_name}-{run_suffix}"
     project = os.environ.get("PROJECT", "binary-prefix-tool-pipeline")
     art_path = os.environ.get("ART_PATH")
 
@@ -213,7 +217,7 @@ async def main() -> None:
             }
         }
 
-    backend = art.TinkerNativeBackend(path=art_path)
+    backend = TinkerNativeBackend(path=art_path)
     model = art.TrainableModel(
         name=model_name,
         project=project,
@@ -239,6 +243,7 @@ async def do_rollout(scenario: Scenario, temp: float) -> art.Trajectory:
         )
         choice = response.choices[0]
         raw_guess, source = extract_guess(choice)
+        sampled_content = choice.message.content or ""
         guess = raw_guess or ""
         valid_guess = is_valid_guess(guess)
         prefix_len = shared_prefix_len(guess, SECRET_BITS) if valid_guess else 0
@@ -258,6 +263,7 @@ async def do_rollout(scenario: Scenario, temp: float) -> art.Trajectory:
             messages_and_choices=[*messages, choice],
             tools=TOOLS,
             reward=reward,
+            logs=[f"sampled_content:\n{sampled_content}"],
             metrics=metrics,
         )
 
 
@@ -12,6 +12,7 @@
 from dotenv import load_dotenv
 
 import art
+from art.tinker_native import TinkerNativeBackend
 
 from . import PipelineTrainer
 
@@ -106,7 +107,7 @@ async def main() -> None:
     model_name = f"{MODEL_NAME}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
 
     print("Initializing TinkerNativeBackend")
-    backend = art.TinkerNativeBackend()
+    backend = TinkerNativeBackend()
 
     print(f"Initializing TrainableModel: {model_name}")
     model = art.TrainableModel(name=model_name, project=PROJECT, base_model=BASE_MODEL)
 
@@ -0,0 +1,192 @@
+"""
+Utilities for guessing good hyperparameters for fine-tuning.
+"""
+
+import json
+import math
+import struct
+from typing import Dict, Tuple
+
+import huggingface_hub
+import numpy as np
+from transformers import AutoConfig
+
+from .utils.misc_utils import not_none
+
+
+def _list_param_shapes_from_safetensors_remote(
+    repo_id: str,
+    revision: str = "main",
+    token: str | None = None,
+) -> Dict[str, Tuple[int, ...]]:
+    """
+    Returns {param_name: shape_tuple} by reading ONLY the safetensors header(s)
+    over HTTP (ranged requests). No full file download.
+    """
+    fs = huggingface_hub.HfFileSystem(token=token)
+    info = huggingface_hub.model_info(repo_id, revision=revision, token=token)
+
+    # find all .safetensors files (handles sharded checkpoints)
+    st_files = [
+        s.rfilename
+        for s in not_none(info.siblings)
+        if s.rfilename.endswith(".safetensors")
+    ]
+    if not st_files:
+        raise FileNotFoundError("No .safetensors files found in this repo.")
+
+    shapes: Dict[str, Tuple[int, ...]] = {}
+
+    for fname in st_files:
+        # Open remote file via fsspec; this performs HTTP range reads under the hood
+        path = f"{repo_id}@{revision}/{fname}"  # HfFileSystem path format
+        with fs.open(path, "rb") as f:
+            # safetensors spec:
+            # [0:8] = little-endian u64 header_len
+            # [8:8+header_len] = UTF-8 JSON header
+            header_len_bytes = f.read(8)
+            assert isinstance(header_len_bytes, bytes)
+            if len(header_len_bytes) < 8:
+                raise IOError(f"File too small or not safetensors: {fname}")
+            (header_len,) = struct.unpack("<Q", header_len_bytes)
+
+            header_bytes = f.read(header_len)
+            assert isinstance(header_bytes, bytes)
+            if len(header_bytes) < header_len:
+                raise IOError(f"Incomplete header read for {fname}")
+
+            header = json.loads(header_bytes.decode("utf-8"))
+            # header maps tensor_name -> { "dtype": "...", "shape": [...], "data_offsets": [start, end] }
+            for name, meta in header.items():
+                if name == "__metadata__":  # optional global metadata block
+                    continue
+                shapes[name] = tuple(meta["shape"])
+
+    return shapes
+
+
+def get_lora_lr_over_full_finetune_lr(model_name: str, lora_alpha: int = 32) -> float:
+    """
+    Return the factor that you should scale the full fine-tuning learning rate by to get the equivalent LoRA learning rate.
+    Previously we had a more complicated formula, but the factor of 10 was more accurate empirically.
+    See Lora Without Regret (https://thinkingmachines.ai/blog/lora/) for more details.
+    """
+    return 10.0
+
+
+def _get_hidden_size(model_name: str) -> int:
+    if "meta-llama/Llama-3" in model_name:
+        # Bypass HF_TOKEN requirement for Llama-3 models
+        return {
+            "meta-llama/Llama-3.2-1B": 2048,
+            "meta-llama/Llama-3.2-1B-Instruct": 2048,
+            "meta-llama/Llama-3.2-3B": 3072,
+            "meta-llama/Llama-3.2-3B-Instruct": 3072,
+            "meta-llama/Llama-3.1-8B": 4096,
+            "meta-llama/Llama-3.1-8B-Instruct": 4096,
+            "meta-llama/Llama-3.1-70B": 8192,
+            "meta-llama/Llama-3.3-70B-Instruct": 8192,
+        }[model_name]
+
+    if model_name in (
+        "deepseek-ai/DeepSeek-V3.1",
+        "deepseek-ai/DeepSeek-V3.1-Base",
+        "moonshotai/Kimi-K2-Thinking",
+    ):
+        return 7168
+
+    config = AutoConfig.from_pretrained(model_name)
+    return config.hidden_size
+
+
+def get_lora_param_count(
+    model_name: str,
+    lora_rank: int = 32,
+    detailed: bool = False,
+    include_experts: bool = True,
+    shared_expert_outer_loras: bool = True,
+) -> int | dict[str, int]:
+    """
+    Get the number of parameters in the LoRA adapter.
+    """
+
+    dim_sum = 0
+    dim_sum_experts = 0
+    ignore = ["gate", "embed_tokens", "q_b_proj", "kv_b_proj"]
+    if not include_experts:
+        ignore.append("experts")
+
+    for name, shape in _list_param_shapes_from_safetensors_remote(model_name).items():
+        if (
+            len(shape) == 2
+            and name.endswith(".weight")
+            and not any([v in name.split(".") for v in ignore])
+        ):
+            parts = name.split(".")
+            if "experts" not in parts or not shared_expert_outer_loras:
+                dim_sum += shape[0] + shape[1]
+            else:
+                # For expert shared outer_loras, we only count the outer dims once, since they are shared across experts
+                expert_idx = int(parts[parts.index("experts") + 1])
+                weight_name = parts[parts.index("experts") + 2]
+                assert weight_name in ["gate_proj", "down_proj", "up_proj"], (
+                    f"Unexpected expert weight name: {weight_name}"
+                )
+                intermediate_dim = shape[1] if weight_name == "down_proj" else shape[0]
+                outer_dim = shape[0] if weight_name == "down_proj" else shape[1]
+
+                dim_sum_experts += intermediate_dim
+                if expert_idx == 0:
+                    dim_sum_experts += outer_dim
+
+    non_expert_params = lora_rank * dim_sum
+    expert_params = lora_rank * dim_sum_experts
+
+    return (
+        (expert_params + non_expert_params)
+        if not detailed
+        else {
+            "expert_params": expert_params,
+            "non_expert_params": non_expert_params,
+            "total_params": expert_params + non_expert_params,
+        }
+    )
+
+
+def get_lr(model_name: str, is_lora: bool = True) -> float:
+    base_lr = 5e-05
+    lora_multiplier = 10.0
+
+    lr = base_lr * lora_multiplier if is_lora else base_lr
+    if "llama" in model_name.lower():
+        exponent_model = 0.781
+    elif "qwen" in model_name.lower():
+        exponent_model = 0.0775
+    else:
+        raise ValueError(f"Unknown model: {model_name}")
+    # TODO: sweep to determine LR multipliers for other models
+    lr = lr * (2000 / _get_hidden_size(model_name)) ** exponent_model
+    return lr
+
+
+def get_full_finetune_param_count(model_name: str) -> float:
+    count = 0
+    for name, shape in _list_param_shapes_from_safetensors_remote(model_name).items():
+        count += np.prod(shape)
+    return float(count)
+
+
+def get_full_finetune_lr_multiplier(model_name: str):
+    return 1.0 / math.sqrt(get_full_finetune_param_count(model_name))
+
+
+def get_lora_lr_multiplier(model_name: str):
+    """
+    Get a model-specific mutliplier for the LR, when training with LoRA.
+    Given two models A and B, and learning rate LR_A that's known to be optimal for A,
+    we can guess an optimal learning rate for B as
+    LR_B = LR_A * get_lora_lr_multiplier(B) / get_lora_lr_multiplier(A)
+    """
+    return get_full_finetune_lr_multiplier(
+        model_name
+    ) * get_lora_lr_over_full_finetune_lr(model_name)
@@ -0,0 +1,55 @@
+"""
+Utilities for working with image processors. Create new types to avoid needing to import AutoImageProcessor and BaseImageProcessor.
+
+
+Avoid importing AutoImageProcessor and BaseImageProcessor until runtime, because they're slow imports.
+"""
+
+from __future__ import annotations
+
+from functools import cache
+from typing import TYPE_CHECKING, Any, TypeAlias
+
+from PIL import Image
+
+if TYPE_CHECKING:
+    # this import takes a few seconds, so avoid it on the module import when possible
+    from transformers.image_processing_utils import BaseImageProcessor
+
+    ImageProcessor: TypeAlias = BaseImageProcessor
+else:
+    # make it importable from other files as a type in runtime
+    ImageProcessor: TypeAlias = Any
+
+
+@cache
+def get_image_processor(model_name: str) -> ImageProcessor:
+    model_name = model_name.split(":")[0]
+
+    from transformers.models.auto.image_processing_auto import AutoImageProcessor
+
+    processor = AutoImageProcessor.from_pretrained(model_name, use_fast=True)
+    assert processor.is_fast, f"Could not load fast image processor for {model_name}"
+    return processor
+
+
+def resize_image(image: Image.Image, max_size: int) -> Image.Image:
+    """
+    Resize an image so that its longest side is at most max_size pixels.
+
+    Preserves aspect ratio and uses LANCZOS resampling for quality.
+    Returns the original image if it's already smaller than max_size.
+    """
+
+    width, height = image.size
+    if max(width, height) <= max_size:
+        return image
+
+    if width > height:
+        new_width = max_size
+        new_height = int(height * max_size / width)
+    else:
+        new_height = max_size
+        new_width = int(width * max_size / height)
+
+    return image.resize((new_width, new_height), Image.Resampling.LANCZOS)