Skip to content

Commit 1b98c10

Browse files
Merge pull request #199 from mistralai/improve_mistral_inference
Improve mistral inference
2 parents 80bd6a8 + a0e0529 commit 1b98c10

File tree

3 files changed

+6
-2
lines changed

3 files changed

+6
-2
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "mistral_inference"
3-
version = "1.3.0"
3+
version = "1.3.1"
44
description = ""
55
authors = ["bam4d <[email protected]>"]
66
readme = "README.md"

src/mistral_inference/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.3.0"
1+
__version__ = "1.3.1"

src/mistral_inference/main.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from mistral_common.protocol.instruct.request import ChatCompletionRequest
1313
from mistral_common.tokens.tokenizers.base import Tokenizer
1414
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
15+
from mistral_common.tokens.tokenizers.tekken import Tekkenizer, SpecialTokenPolicy
1516
from mistral_common.tokens.tokenizers.sentencepiece import is_sentencepiece
1617
from mistral_common.tokens.tokenizers.tekken import is_tekken
1718

@@ -36,6 +37,9 @@ def load_tokenizer(model_path: Path) -> MistralTokenizer:
3637

3738
mistral_tokenizer = MistralTokenizer.from_file(str(model_path / tokenizer[0]))
3839

40+
if isinstance(mistral_tokenizer.instruct_tokenizer.tokenizer, Tekkenizer):
41+
mistral_tokenizer.instruct_tokenizer.tokenizer.special_token_policy = SpecialTokenPolicy.KEEP
42+
3943
logging.info(f"Loaded tokenizer of type {mistral_tokenizer.instruct_tokenizer.__class__}")
4044

4145
return mistral_tokenizer

0 commit comments

Comments
 (0)