Huggingface transformers 里的模型加载的两种方式的写法:
在线方式:
from transformers import AutoModel, AutoTokenizer
from utils import load_model_on_gpus
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
离线方式:
from transformers import AutoModel, AutoTokenizer
from utils import load_model_on_gpus
tokenizer = AutoTokenizer.from_pretrained(
r"E:\\chat-glm\\huggingface_hub_cache\\models--THUDM--chatglm2-6b\\snapshots\\7fabe56db91e085c9c027f56f1c654d137bdba40\\",
model = AutoModel.from_pretrained(
r"E:\\chat-glm\\huggingface_hub_cache\\models--THUDM--chatglm2-6b\\snapshots\\7fabe56db91e085c9c027f56f1c654d137bdba40\\",
trust_remote_code=True).cuda()