Skip to content

Commit af5b486

Browse files
authored
Temporarily downgrade transformers (#596)
* fix apex build * fix pynvml * temporarily revert transformers update
1 parent 7d097a4 commit af5b486

5 files changed

Lines changed: 58 additions & 64 deletions

File tree

pyproject.toml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ dependencies = [
1919
plotting = ["matplotlib>=3.10.1", "seaborn>=0.13.2"]
2020

2121
backend = [
22-
"peft>=0.18.0",
22+
"peft>=0.14.0",
2323
"hf-xet>=1.1.0",
2424
"bitsandbytes>=0.45.2",
2525
"unsloth==2026.2.1",
@@ -30,7 +30,7 @@ backend = [
3030
"awscli>=1.38.1",
3131
"setuptools>=78.1.0",
3232
"wandb==0.25.0",
33-
"transformers==5.2.0",
33+
"transformers>=4.55.2,<=4.57.3",
3434
"duckdb>=1.0.0",
3535
"pyarrow>=15.0.0",
3636
"trl==0.20.0",
@@ -65,7 +65,7 @@ tinker = [
6565
"pydantic>=2.12.5",
6666
"tinker>=0.8.1",
6767
"torch>=2.8.0",
68-
"transformers==5.2.0",
68+
"transformers>=4.55.2,<=4.57.3",
6969
"uvicorn>=0.35.0",
7070
"datrie>=0.8.3",
7171
]
@@ -122,15 +122,10 @@ required-version = ">=0.6.15"
122122
# Override numpy to <2.0 for compatibility with megatron-core in the training
123123
# environment. vLLM 0.15.1 pulls opencv-python-headless>=4.13 which wants
124124
# numpy>=2 on Python 3.9+, but megatron-core requires numpy<2.
125-
override-dependencies = [
126-
"transformer-engine>=2.11.0",
127-
"numpy<2",
128-
# Override unsloth's overly strict constraint on transformers — v5.x
129-
# is confirmed working per unsloth February-2026 release notes
130-
"transformers==5.2.0",
131-
]
132-
exclude-dependencies = ["pynvml"]
133-
no-build-isolation-package = ["apex", "transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
125+
override-dependencies = ["transformer-engine>=2.11.0", "numpy<2"]
126+
# Keep apex build isolation enabled so uv can inject torch from
127+
# `extra-build-dependencies` during lock/sync on non-GPU client machines.
128+
no-build-isolation-package = ["transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
134129

135130
[tool.uv.extra-build-dependencies]
136131
apex = ["torch>=2.8.0"]
@@ -139,6 +134,11 @@ transformer-engine-torch = ["torch>=2.8.0"]
139134
[tool.uv.extra-build-variables]
140135
apex = { APEX_CPP_EXT = "1", APEX_CUDA_EXT = "1", APEX_FAST_LAYER_NORM = "1", APEX_PARALLEL_BUILD = "16", NVCC_APPEND_FLAGS = "--threads 4" }
141136

137+
[[tool.uv.dependency-metadata]]
138+
name = "apex"
139+
version = "0.1"
140+
requires-dist = ["packaging"]
141+
142142
[tool.ty.environment]
143143
python-version = "3.11"
144144

src/art/__init__.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,9 @@ def __init__(self, **kwargs):
4040
import transformers
4141

4242
try:
43-
from .transformers.patches import (
44-
patch_apply_chat_template,
45-
patch_preprocess_mask_arguments,
46-
)
43+
from .transformers.patches import patch_preprocess_mask_arguments
4744

4845
patch_preprocess_mask_arguments()
49-
patch_apply_chat_template()
5046
except Exception:
5147
pass
5248
except ImportError:

src/art/dev/model.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ class PeftArgs(TypedDict, total=False):
197197

198198
class TrainerArgs(TypedDict, total=False):
199199
output_dir: str | None
200+
overwrite_output_dir: bool
200201
do_train: bool
201202
do_eval: bool
202203
do_predict: bool
@@ -225,6 +226,7 @@ class TrainerArgs(TypedDict, total=False):
225226
log_level: str
226227
log_level_replica: str
227228
log_on_each_node: bool
229+
logging_dir: str | None
228230
logging_strategy: "IntervalStrategy | str"
229231
logging_first_step: bool
230232
logging_steps: float
@@ -241,21 +243,25 @@ class TrainerArgs(TypedDict, total=False):
241243
use_mps_device: bool
242244
seed: int
243245
data_seed: int | None
246+
jit_mode_eval: bool
244247
use_ipex: bool
245248
bf16: bool
246249
fp16: bool
247250
fp16_opt_level: str
251+
half_precision_backend: str
248252
bf16_full_eval: bool
249253
fp16_full_eval: bool
250254
tf32: bool | None
251255
local_rank: int
252256
ddp_backend: str | None
257+
tpu_num_cores: int | None
253258
tpu_metrics_debug: bool
254259
debug: str | list[DebugOption]
255260
dataloader_drop_last: bool
256261
eval_steps: float | None
257262
dataloader_num_workers: int
258263
dataloader_prefetch_factor: int | None
264+
past_index: int
259265
run_name: str | None
260266
disable_tqdm: bool | None
261267
remove_unused_columns: bool | None
@@ -296,8 +302,15 @@ class TrainerArgs(TypedDict, total=False):
296302
include_inputs_for_metrics: bool
297303
include_for_metrics: list[str]
298304
eval_do_concat_batches: bool
305+
fp16_backend: str
306+
push_to_hub_model_id: str | None
307+
push_to_hub_organization: str | None
308+
push_to_hub_token: str | None
309+
mp_parameters: str
299310
auto_find_batch_size: bool
300311
full_determinism: bool
312+
torchdynamo: str | None
313+
ray_scope: str | None
301314
ddp_timeout: int
302315
torch_compile: bool
303316
torch_compile_backend: str | None

src/art/transformers/patches.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
import functools
21
from typing import TYPE_CHECKING, Optional, Union
32

43
import torch
54
from transformers import masking_utils
65
from transformers.cache_utils import Cache
76
from transformers.configuration_utils import PretrainedConfig
8-
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
97

108
if TYPE_CHECKING:
119
from torch.nn.attention.flex_attention import BlockMask
@@ -37,19 +35,3 @@ def _patched_preprocess_mask_arguments(
3735

3836
def patch_preprocess_mask_arguments() -> None:
3937
masking_utils._preprocess_mask_arguments = _patched_preprocess_mask_arguments # ty:ignore[invalid-assignment]
40-
41-
42-
def patch_apply_chat_template() -> None:
43-
"""Default return_dict=False in apply_chat_template for transformers v5.
44-
45-
Transformers v5 changed the default from list[int] to BatchEncoding.
46-
This restores the v4 behavior so all call sites get list[int] back.
47-
"""
48-
original = PreTrainedTokenizerBase.apply_chat_template
49-
50-
@functools.wraps(original)
51-
def _patched(self, *args, **kwargs): # type: ignore
52-
kwargs.setdefault("return_dict", False)
53-
return original(self, *args, **kwargs)
54-
55-
PreTrainedTokenizerBase.apply_chat_template = _patched # type: ignore

uv.lock

Lines changed: 32 additions & 29 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)