mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 21:00:08 +06:00
parent
58e5e976e0
commit
48dd89cf55
@ -21,14 +21,19 @@ from peft import LoraConfig
|
||||
from safetensors.torch import load_file, save_file
|
||||
|
||||
from transformers import (
|
||||
AutoProcessor,
|
||||
Phi4MultimodalAudioConfig,
|
||||
Phi4MultimodalConfig,
|
||||
Phi4MultimodalFeatureExtractor,
|
||||
Phi4MultimodalForCausalLM,
|
||||
Phi4MultimodalImageProcessorFast,
|
||||
Phi4MultimodalProcessor,
|
||||
Phi4MultimodalVisionConfig,
|
||||
)
|
||||
|
||||
|
||||
CHAT_TEMPLATE = "{% for message in messages %}{{ '<|' + message['role'] + '|>' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'audio' %}{{ '<|audio|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% endif %}{{ '<|end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}"
|
||||
|
||||
# fmt: off
|
||||
STATE_DICT_MAPPING = {
|
||||
r"^model.embed_tokens_extend.audio_embed.encoder.encoders.(\d+).feed_forward_(in|out).net.0.linear": r"model.embed_tokens_extend.audio_embed.encoder.encoders.\1.feed_forward_\2.gate_up_proj",
|
||||
@ -163,12 +168,36 @@ def convert_and_write_model(input_dir: str, output_dir: str):
|
||||
|
||||
def convert_and_save_processor(input_dir: str, output_dir: str):
|
||||
"""Convert the processor."""
|
||||
processor = Phi4MultimodalProcessor.from_pretrained(input_dir)
|
||||
del processor.image_processor.auto_map
|
||||
del processor.audio_processor.auto_map
|
||||
processor.chat_template = processor.tokenizer.chat_template
|
||||
processor.tokenizer.extra_special_tokens = {"image_token": "<|endoftext10|>", "audio_token": "<|endoftext11|>"}
|
||||
processor.save_pretrained(output_dir)
|
||||
original_processor = AutoProcessor.from_pretrained(input_dir, trust_remote_code=True)
|
||||
original_processor.tokenizer.extra_special_tokens = {"image_token": "<|image|>", "audio_token": "<|audio|>"}
|
||||
converted_processor = Phi4MultimodalProcessor(
|
||||
tokenizer=original_processor.tokenizer,
|
||||
image_processor=Phi4MultimodalImageProcessorFast(),
|
||||
audio_processor=Phi4MultimodalFeatureExtractor(),
|
||||
chat_template=CHAT_TEMPLATE,
|
||||
)
|
||||
converted_processor.save_pretrained(output_dir)
|
||||
|
||||
# we need to rename a few tokens but tokenizers doesn't allow doing that programatically
|
||||
# To avoid consufion and manual renaming, the below part load and re-saved each json file
|
||||
vocab = json.load(open(f"{output_dir}/vocab.json", "r"))
|
||||
vocab["<|endoftext11|>"] = "<|audio|>"
|
||||
vocab["<|endoftext10|>"] = "<|image|>"
|
||||
json.dump(vocab, open(f"{output_dir}/vocab.json", "w"))
|
||||
|
||||
tokenizer = json.load(open(f"{output_dir}/tokenizer.json", "r"))
|
||||
tokenizer["added_tokens"][1]["content"] = "<|image|>"
|
||||
tokenizer["added_tokens"][2]["content"] = "<|audio|>"
|
||||
tokenizer["model"]["vocab"]["<|audio|>"] = tokenizer["model"]["vocab"]["<|endoftext11|>"]
|
||||
tokenizer["model"]["vocab"]["<|image|>"] = tokenizer["model"]["vocab"]["<|endoftext10|>"]
|
||||
del tokenizer["model"]["vocab"]["<|endoftext11|>"]
|
||||
del tokenizer["model"]["vocab"]["<|endoftext10|>"]
|
||||
json.dump(tokenizer, open(f"{output_dir}/tokenizer.json", "w"))
|
||||
|
||||
tokenizer_config = json.load(open(f"{output_dir}/tokenizer_config.json", "r"))
|
||||
tokenizer_config["added_tokens_decoder"]["200010"]["content"] = "<|image|>"
|
||||
tokenizer_config["added_tokens_decoder"]["200011"]["content"] = "<|audio|>"
|
||||
json.dump(tokenizer_config, open(f"{output_dir}/tokenizer_config.json", "w"))
|
||||
|
||||
|
||||
def extract_adapters_data(input_dir: str, output_dir: str):
|
||||
|
Loading…
Reference in New Issue
Block a user