Skip to content

Commit 14e3d19

Browse files
committed
refactor: Update token handling in OpenAICompatibleTinkerServer
- Modified the chat template application to return input IDs when using BatchEncoding. - Improved clarity by separating the encoding logic from the return statement. These changes enhance the token management process within the OpenAICompatibleTinkerServer class.
1 parent 13f9d3b commit 14e3d19

1 file changed

Lines changed: 9 additions & 7 deletions

File tree

src/art/tinker/server.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from pydantic import SkipValidation
2626
import tinker
2727
from tinker.lib.public_interfaces.rest_client import RestClient as TinkerRestClient
28+
from transformers.tokenization_utils_base import BatchEncoding
2829
import uvicorn
2930

3031
from art.tinker.cookbook_v import renderers
@@ -60,14 +61,15 @@ async def prompt_tokens(
6061
messages: list[ChatCompletionMessageParam],
6162
tools: list[ChatCompletionToolUnionParam] | None,
6263
) -> list[int]:
63-
return cast(
64-
list[int],
65-
self._get_renderer(base_model).tokenizer.apply_chat_template(
66-
messages, # type: ignore
67-
tools=tools, # type: ignore
68-
add_generation_prompt=True,
69-
),
64+
encoding = self._get_renderer(base_model).tokenizer.apply_chat_template(
65+
messages, # type: ignore
66+
tools=tools, # type: ignore
67+
add_generation_prompt=True,
7068
)
69+
if isinstance(encoding, BatchEncoding):
70+
return encoding.input_ids
71+
else:
72+
return encoding # type: ignore
7173

7274
async def chat_completion_and_token_discrepancies(
7375
self,

0 commit comments

Comments
 (0)