Skip to content

Commit

Permalink
feat: also retrive webpage content from search results
Browse files Browse the repository at this point in the history
  • Loading branch information
Hanssen0 committed Dec 21, 2023
1 parent b9aecae commit 1ca94c7
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 19 deletions.
26 changes: 16 additions & 10 deletions BotHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ async def send_message_async(
if response_len == 0 and len(request_response.response_images) == 0:
request_response.response = messages["empty_message"]

await _send_prepared_message_async(config, messages, request_response, end, plain_text)
await _send_prepared_message_async(
config, messages, request_response, end, plain_text
)

# Error?
except Exception as e:
Expand Down Expand Up @@ -316,15 +318,19 @@ async def parse_img(img_source: str):
:return:
"""
try:
res = requests.head(
img_source,
timeout=10,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/91.4472.114 Safari/537.36"
},
allow_redirects=True,
loop = asyncio.get_event_loop()
res = await loop.run_in_executor(
None,
lambda: requests.head(
img_source,
timeout=10,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/91.4472.114 Safari/537.36"
},
allow_redirects=True,
),
)
content_type = res.headers.get("content-type")
if not content_type.startswith("image"):
Expand Down
35 changes: 26 additions & 9 deletions GoogleAIModule.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime
import re
import asyncio
import time
import uuid
import json
Expand Down Expand Up @@ -443,33 +444,49 @@ def __init__(
self.msg_args = msg_args or []


def _get_webpage_by_url(args):
async def _get_webpage_by_url(args):
try:
url = args["url"]
if not (schema := re.search(r"(.*)://", url)):
url = "https://" + url
elif (schema := schema.group(1)) not in ["https", "http"]:
return {"error": f"Invalid url schema {schema}"}

header = requests.head(url, timeout=20, allow_redirects=True)
loop = asyncio.get_event_loop()
header = await loop.run_in_executor(
None, lambda: requests.head(url, timeout=20, allow_redirects=True)
)
content_type = header.headers.get("content-type")
if not content_type.startswith("text/html"):
return {"error": f"Unsupported content type {content_type}"}

res = requests.get(url, timeout=20, allow_redirects=True)
res = await loop.run_in_executor(
None, lambda: requests.get(url, timeout=20, allow_redirects=True)
)
document = Document(res.content)
return {"webpage": markdownify(document.summary())}
except Exception:
return {"error": "Can not read the url"}


def _search_on_google(args):
async def _complete_google_result(res):
return {
"url": res.url,
"title": res.title,
"description": res.description,
"content": await _get_webpage_by_url({"url": res.url}),
}


async def _search_on_google(args):
try:
return {
"results": [
{"url": res.url, "title": res.title, "description": res.description}
for res in googlesearch(args["keyword"], advanced=True)
]
"results": await asyncio.gather(
*[
_complete_google_result(res)
for res in googlesearch(args["keyword"], advanced=True, num_results=3)
]
)
}

except Exception:
Expand Down Expand Up @@ -529,7 +546,7 @@ def _invoke_tool(function_call: FunctionCall):
tool = next((t for t in TOOLS if t.name == function_call.name), None)
if not tool:
return {"error": "Function not found"}
return tool.handler(function_call.args)
return asyncio.run(tool.handler(function_call.args))


def _get_tool_msg(function_call: FunctionCall):
Expand Down

0 comments on commit 1ca94c7

Please sign in to comment.