-
Notifications
You must be signed in to change notification settings - Fork 1
/
llama_assistant.py
112 lines (92 loc) · 3.23 KB
/
llama_assistant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from llama_cpp import Llama
import json
class LlamaAssistant:
# chat_format = functionary llama-2
def __init__(self, model_path,chat_format="llama-2",n_ctx=0,embedding=True):
self.llm = Llama(model_path=model_path, chat_format=chat_format,n_ctx=n_ctx,embedding=embedding)
# self.chat=self.llm.create_chat_completion
# self.llm.n_ctx()
# create_embedding // embedding=True
# reset
def pre_messages(self, user_prompt):
system_prompt = {
"role": "system",
"content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"
}
return [
system_prompt,
user_prompt
]
def chat(self,*args, **kwargs):
messages=kwargs.get('messages')
max_length = kwargs.get('max_length')
max_context_length = kwargs.get('max_context_length')
do_sample = kwargs.get('do_sample')
top_p = kwargs.get('top_p')
temperature = kwargs.get('temperature')
# print( messages,temperature,top_p,max_length)
messages=[json.loads(m.json()) for m in messages]
# print(messages)
res=self.llm.create_chat_completion(
messages=messages,
temperature=temperature,
top_p=top_p,
max_tokens=max_length)
print('res',res)
return res
def run(self, user_prompt, tools, tool_choice):
user_prompt = {
"role": "user",
"content": user_prompt
}
messages = self.pre_messages(user_prompt)
# print(messages)
return self.llm.create_chat_completion(
messages=messages,
tools=tools,
tool_choice=tool_choice
)
def embedding(self,texts):
result=[]
for data in self.llm.create_embedding(texts)["data"]:
result.append(data["embedding"])
print(len(result))
# list(map(float, self.llm.create_embedding(input)["data"][0]["embedding"]))
return result
# 示例用法
# assistant = LlamaAssistant(
# model_path="models/llama/functionary-7b-v1.Q5_K.gguf",
# chat_format="functionary"
# )
# user_input = "Extract Jason is 25 years old"
# tools = [{
# "type": "function",
# "function": {
# "name": "UserDetail",
# "parameters": {
# "type": "object",
# "title": "UserDetail",
# "properties": {
# "name": {
# "title": "Name",
# "type": "string"
# },
# "age": {
# "title": "Age",
# "type": "integer"
# }
# },
# "required": ["name", "age"]
# }
# }
# }]
# tool_choice = {
# "type": "function",
# "function": {
# "name": "UserDetail"
# }
# }
# result=assistant.run(user_input, tools, tool_choice)
# # print('#result#',result)
# # result=assistant.embedding([user_input,user_input] )
# print('#result#', result)