Failed to upload: undefined PDF QA with ERROR: Exception in ASGI application #949

miriam-z · 2024-04-28T19:17:30Z

Describe the bug
A clear and concise description of what the bug is.:

Trying to upload PDF file in Deployed environment, getting Failed to upload: undefined

To Reproduce
Steps to reproduce the behavior:

Successfully built and deployed in Docker in Cloud

app.py:

from typing import List
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain_anthropic import ChatAnthropic
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain.docstore.document import Document


import chainlit as cl
from chainlit.types import AskFileResponse


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=100)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

welcome_message = """Welcome to the Chainlit PDF QA demo! To get started:
1. Upload a PDF file
2. Ask a question about the file
"""


def process_file(file: AskFileResponse):
    Loader = PyPDFLoader

    loader = Loader(file.path)
    documents = loader.load()
    docs = text_splitter.split_documents(documents)
    for i, doc in enumerate(docs):
        doc.metadata["source"] = f"source_{i}"
    return docs


def get_docsearch(file: AskFileResponse):
    docs = process_file(file)

    # save data in the user session
    cl.user_session.set("docs", docs)

    docsearch = Chroma.from_documents(documents=docs, embedding=embeddings)

    return docsearch


@cl.on_chat_start
async def start():
    await cl.Avatar(
        name="Chatbot",
        url="https://avatars.githubusercontent.com/u/128686189?s=400&u=a1d1553023f8ea0921fba0debbe92a8c5f840dd9&v=4",
    ).send()
    files = None
    while files is None:
        files = await cl.AskFileMessage(
            content=welcome_message,
            accept=["application/pdf"],
            max_size_mb=20,
            timeout=180,
        ).send()

    file = files[0]

    msg = cl.Message(
        content=f"Processing `{file.name}`...", disable_feedback=True)
    await msg.send()

    docsearch = await cl.make_async(get_docsearch)(file)

    message_history = ChatMessageHistory()

    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True,
    )

    chain = ConversationalRetrievalChain.from_llm(
        ChatAnthropic(
            model='claude-3-opus-20240229',
            temperature=0, 
            streaming=True,
            system="You are an expert research assistant. Here is a document you will answer questions about:  \nFirst, find the quotes from the document that are most relevant to answering the question, and then print them in numbered order. Quotes should be relatively short.  \n  \nIf there are no relevant quotes, write \"No relevant quotes\" instead.  \n  \nThen, answer the question, starting with \"Answer:\". Do not include or reference quoted content verbatim in the answer. Don't say \"According to Quote [1]\" when answering. Instead make references to quotes relevant to each section of the answer solely by adding their bracketed numbers at the end of relevant sentences.  \n  \nThus, the format of your overall response should look like what's shown between the <example></example> tags. Make sure to follow the formatting and spacing exactly.  \n<example>  \nQuotes:  \n[1] \"Company X reported revenue of $12 million in 2021.\"  \n[2] \"Almost 90% of revenue came from widget sales, with gadget sales making up the remaining 10%.\"  \n  \nAnswer:  \nCompany X earned $12 million. [1] Almost 90% of it was from widget sales. [2]  \n</example>  \n  \nIf the question cannot be answered by the document, say so.",             
            ),
        chain_type="stuff",
        retriever=docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 6}),
        memory=memory,
        return_source_documents=True,
    )

    # let the user know that the system is ready
    msg.content = f"`{file.name}` processed. You can now ask questions!"
    await msg.update()

    cl.user_session.set("chain", chain)


@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain")  # type: ConversationalRetrievalChain
    cb = cl.AsyncLangchainCallbackHandler()
    res = await chain.acall(message.content, callbacks=[cb])
    answer = res["answer"]
    source_documents = res["source_documents"]  # type: List[Document]

    text_elements = []  # type: List[cl.Text]

    if source_documents:
        for source_idx, source_doc in enumerate(source_documents):
            source_name = f"source_{source_idx}"
            # create the text element referenced in the message
            text_elements.append(
                cl.Text(content=source_doc.page_content, name=source_name)
            )
        source_names = [text_el.name for text_el in text_elements]

        if source_names:
            answer += f"\nSources: {', '.join(source_names)}"
        else:
            answer += "\nNo sources found"

    await cl.Message(content=answer, elements=text_elements).send()

Dockerfile:

FROM python:3.12

RUN useradd -m -u 15000 user

USER 15000

ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH

WORKDIR $HOME/app

COPY --chown=user . $HOME/app

# Create a writable directory for the application
RUN mkdir -p $HOME/app/.files

RUN chown -R user:user $HOME/app/.files

RUN chmod 777 $HOME/app/.files/


COPY ./requirements.txt ~/app/requirements.txt

# RUN pip install --no-cache-dir -r requirements.txt
RUN pip install -r requirements.txt

COPY . .

EXPOSE 7860

CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

requirements.txt:

langchain
langchain-google-genai
langchain-anthropic
langchain-community
chainlit
chromadb
pypdf
tiktoken

.env

ANTHROPIC_API_KEY=sk-ant-XXXX-XXXX-XXXX
OPENAI_API_KEY=sk-XXXX
GOOGLE_API_KEY=XXXX
LITERAL_API_KEY=XXXX

Expected behavior
A clear and concise description of what you expected to happen.:
PDF to be successfully uploaded

Screenshots
If applicable, add screenshots to help explain your problem:

Runtime Logs:

2024-04-29T02:56:39.860+08:00 Production Application Logs main
File "/home/user/.local/lib/python3.12/site-packages/starlette/middleware/errors.py", line 164, in call
2024-04-29T02:56:39.860+08:00 Production Application Logs main
raise exc
2024-04-29T02:56:39.860+08:00 Production Application Logs main
File "/home/user/.local/lib/python3.12/site-packages/starlette/middleware/errors.py", line 186, in call
2024-04-29T02:56:39.860+08:00 Production Application Logs main
await self.middleware_stack(scope, receive, send)
2024-04-29T02:56:39.860+08:00 Production Application Logs main
File "/home/user/.local/lib/python3.12/site-packages/starlette/applications.py", line 123, in call
2024-04-29T02:56:39.860+08:00 Production Application Logs main
await super().call(scope, receive, send)
2024-04-29T02:56:39.860+08:00 Production Application Logs main
File "/home/user/.local/lib/python3.12/site-packages/fastapi/applications.py", line 1054, in call
2024-04-29T02:56:39.860+08:00 Production Application Logs main
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2024-04-29T02:56:39.860+08:00 Production Application Logs main
return await self.app(scope, receive, send)
2024-04-29T02:56:39.860+08:00 Production Application Logs main
File "/home/user/.local/lib/python3.12/site-packages/uvicorn/middleware/proxy_headers.py", line 84, in call
2024-04-29T02:56:39.860+08:00 Production Application Logs main
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2024-04-29T02:56:39.860+08:00 Production Application Logs main
result = await app( # type: ignore[func-returns-value]
2024-04-29T02:56:39.860+08:00 Production Application Logs main
File "/home/user/.local/lib/python3.12/site-packages/uvicorn/protocols/http/httptools_impl.py", line 426, in run_asgi
2024-04-29T02:56:39.860+08:00 Production Application Logs main
Traceback (most recent call last):
2024-04-29T02:56:39.860+08:00 Production Application Logs main
ERROR: Exception in ASGI application

Desktop (please complete the following information):

OS: Mac 12.7.4
Browser chrome: Version 123.0.6312.122
Chainlit: version 1.0.505
Python --version
Python 3.12.1

Additional context
Add any other context about the problem here.

The text was updated successfully, but these errors were encountered:

miriam-z added the needs-triage label Apr 28, 2024

miriam-z changed the title ~~Failed to upload: undefined PDF QA~~ Failed to upload: undefined PDF QA with ERROR: Exception in ASGI application Apr 28, 2024

tpatel added cookbook Issues related to the chainlit-cookbook repository and removed needs-triage labels Apr 29, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Failed to upload: undefined PDF QA with ERROR: Exception in ASGI application #949

Failed to upload: undefined PDF QA with ERROR: Exception in ASGI application #949

miriam-z commented Apr 28, 2024 •

edited

Failed to upload: undefined PDF QA with ERROR: Exception in ASGI application #949

Failed to upload: undefined PDF QA with ERROR: Exception in ASGI application #949

Comments

miriam-z commented Apr 28, 2024 • edited

miriam-z commented Apr 28, 2024 •

edited