AlignBench
AlignBench copied to clipboard
chatglm3测试结果差异大
上传网站后得分4.76 我的测试代码: ` from inference.models import api_model import json import requests from transformers import AutoTokenizer, AutoModel #import zhipuai
#zhipuai.api_key = "" # TODO
class chatglm(api_model): def init(self, workers=10): #self.model_name = "chatglm_turbo" # TODO self.temperature = 0.7 self.workers = workers
model_path= "/v1/llm/models/chatglm3-6b/"
self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True, device="cuda")
def get_api_result(self, prompt):
question = prompt["question"]
temperature = prompt.get("temperature", self.temperature)
output, history = self.model.chat(self.tokenizer, question, temperature=temperature, history=[])
# response = zhipuai.model_api.invoke(
# model=self.model_name,
# prompt=single_turn_wrapper(question),
# temperature=temperature
# )
# output = response.get("data").get("choices")[0].get("content")
return output
`
我测出来和你的结果差不多,可能官方的system prompt有区别或者genConfig里面其他参数调优了