-
Notifications
You must be signed in to change notification settings - Fork 0
/
custom_llm2.py
68 lines (61 loc) · 2.38 KB
/
custom_llm2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from typing import Optional, List, Mapping, Any
from llama_index.core import SimpleDirectoryReader, SummaryIndex
from llama_index.core.callbacks import CallbackManager
from llama_index.core.llms import (
CustomLLM,
CompletionResponse,
CompletionResponseGen,
LLMMetadata,
)
from llama_index.core.llms.callbacks import llm_completion_callback
from llama_index.core import Settings
import os, requests
from dotenv import load_dotenv
load_dotenv()
CLOUDFLARE_API_TOKEN = os.getenv("CLOUDFLARE_API_TOKEN")
CLOUDFLARE_ACCOUNT_ID = os.getenv("CLOUDFLARE_ACCOUNT_ID")
model = "@hf/thebloke/deepseek-coder-6.7b-instruct-awq"
class Cloudflare_WorkersAI_LLM(CustomLLM):
context_window: int = 4096
num_output: int = 256
model_name: str = model
@property
def metadata(self) -> LLMMetadata:
"""Get LLM metadata."""
return LLMMetadata(
context_window=self.context_window,
num_output=self.num_output,
model_name=self.model_name,
)
@llm_completion_callback()
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
response = requests.post(
f"https://api.cloudflare.com/client/v4/accounts/{CLOUDFLARE_ACCOUNT_ID}/ai/run/{model}",
headers={"Authorization": f"Bearer {CLOUDFLARE_API_TOKEN}"},
json={"messages": [
# {"role": "system", "content": "You are a Helpful AI assistant."},
{"role": "user", "content": prompt}
]}
)
inference = response.json()
res = inference["result"]["response"]
return CompletionResponse(text=str(res))
@llm_completion_callback()
def stream_complete(
self, prompt: str, **kwargs: Any
) -> CompletionResponseGen:
response = requests.post(
f"https://api.cloudflare.com/client/v4/accounts/{CLOUDFLARE_ACCOUNT_ID}/ai/run/{model}",
headers={"Authorization": f"Bearer {CLOUDFLARE_API_TOKEN}"},
json={"messages": [
# {"role": "system", "content": "You are a Helpful AI assistant."},
{"role": "user", "content": prompt}
]}
)
inference = response.json()
res = inference["result"]["response"]
res = str(res)
response = ""
for token in res:
response += token
yield CompletionResponse(text=response, delta=token)