Skip to content

Commit

Permalink
Isolate JSON prompts so can change language etc.
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudotensor committed Apr 23, 2024
1 parent d8b064d commit 88b62f1
Show file tree
Hide file tree
Showing 12 changed files with 353 additions and 125 deletions.
144 changes: 81 additions & 63 deletions gradio_utils/grclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ def query_or_summarize_or_extract(
file: list[str] | str | None = None,
url: list[str] | str | None = None,
embed: bool = True,

chunk: bool = True,
chunk_size: int = 512,
langchain_mode: str = None,
Expand All @@ -677,13 +678,21 @@ def query_or_summarize_or_extract(
document_content_substrings: Union[str, List[str]] = [],
document_content_substrings_op: str = "and",
system_prompt: str | None = "",

pre_prompt_query: str | None = pre_prompt_query0,
prompt_query: str | None = prompt_query0,
pre_prompt_summary: str | None = pre_prompt_summary0,
prompt_summary: str | None = prompt_summary0,
pre_prompt_extraction: str | None = pre_prompt_extraction0,
prompt_extraction: str | None = prompt_extraction0,
hyde_llm_prompt: str | None = hyde_llm_prompt0,

user_prompt_for_fake_system_prompt: str = None,
json_object_prompt: str = None,
json_object_prompt_simpler: str = None,
json_code_prompt: str = None,
json_schema_instruction: str = None,

model: str | int | None = None,
stream_output: bool = False,
do_sample: bool = False,
Expand Down Expand Up @@ -731,15 +740,18 @@ def query_or_summarize_or_extract(
tts_speed: float = 1.0,
visible_image_models: List[str] = [],
visible_models: Union[str, int, list] = None,
num_return_sequences: int = None, # don't use
chat: bool = True, # don't use
min_new_tokens: int = None, # don't use
early_stopping: Union[bool, str] = None, # don't use
iinput: str = "", # don't use
iinput_nochat: str = "", # don't use
instruction_nochat: str = "", # don't use
context: str = "", # don't use
num_beams: int = 1, # don't use

# don't use the below (no doc string stuff) block
num_return_sequences: int = None,
chat: bool = True,
min_new_tokens: int = None,
early_stopping: Union[bool, str] = None,
iinput: str = "",
iinput_nochat: str = "",
instruction_nochat: str = "",
context: str = "",
num_beams: int = 1,

asserts: bool = False,
) -> Generator[ReturnType, None, None]:
"""
Expand All @@ -764,31 +776,31 @@ def query_or_summarize_or_extract(
url: a url to give or urls to use
embed: whether to embed content uploaded
langchain_mode: "LLM" to talk to LLM with no docs, "MyData" for personal docs, "UserData" for shared docs, etc.
langchain_action: Action to take, "Query" or "Summarize" or "Extract"
langchain_agents: Which agents to use, if any
top_k_docs: number of document parts.
:param langchain_mode: "LLM" to talk to LLM with no docs, "MyData" for personal docs, "UserData" for shared docs, etc.
:param langchain_action: Action to take, "Query" or "Summarize" or "Extract"
:param langchain_agents: Which agents to use, if any
:param top_k_docs: number of document parts.
When doing query, number of chunks
When doing summarization, not related to vectorDB chunks that are not used
E.g. if PDF, then number of pages
chunk: whether to chunk sources for document Q/A
chunk_size: Size in characters of chunks
document_choice: Which documents ("All" means all) -- need to use upload_api API call to get server's name if want to select
document_subset: Type of query, see src/gen.py
document_source_substrings: See gen.py
document_source_substrings_op: See gen.py
document_content_substrings: See gen.py
document_content_substrings_op: See gen.py
system_prompt: pass system prompt to models that support it.
:param chunk: whether to chunk sources for document Q/A
:param chunk_size: Size in characters of chunks
:param document_choice: Which documents ("All" means all) -- need to use upload_api API call to get server's name if want to select
:param document_subset: Type of query, see src/gen.py
:param document_source_substrings: See gen.py
:param document_source_substrings_op: See gen.py
:param document_content_substrings: See gen.py
:param document_content_substrings_op: See gen.py
:param system_prompt: pass system prompt to models that support it.
If 'auto' or None, then use automatic version
If '', then use no system prompt (default)
pre_prompt_query: Prompt that comes before document part
prompt_query: Prompt that comes after document part
pre_prompt_summary: Prompt that comes before document part
:param pre_prompt_query: Prompt that comes before document part
:param prompt_query: Prompt that comes after document part
:param pre_prompt_summary: Prompt that comes before document part
None makes h2oGPT internally use its defaults
E.g. "In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text"
prompt_summary: Prompt that comes after document part
:param prompt_summary: Prompt that comes after document part
None makes h2oGPT internally use its defaults
E.g. "Using only the text above, write a condensed and concise summary of key results (preferably as bullet points):\n"
i.e. for some internal document part fstring, the template looks like:
Expand All @@ -797,53 +809,59 @@ def query_or_summarize_or_extract(
%s
\"\"\"
%s" % (pre_prompt_summary, fstring, prompt_summary)
hyde_llm_prompt: hyde prompt for first step when using LLM
h2ogpt_key: Access Key to h2oGPT server (if not already set in client at init time)
model: base_model name or integer index of model_lock on h2oGPT server
:param hyde_llm_prompt: hyde prompt for first step when using LLM
:param user_prompt_for_fake_system_prompt: user part of pre-conversation if LLM doesn't handle system prompt
:param json_object_prompt: prompt for getting LLM to do JSON object
:param json_object_prompt_simpler: simpler of "" for MistralAI
:param json_code_prompt: prompt for getting LLm to do JSON in code block
:param json_schema_instruction: prompt for LLM to use schema
:param h2ogpt_key: Access Key to h2oGPT server (if not already set in client at init time)
:param model: base_model name or integer index of model_lock on h2oGPT server
None results in use of first (0th index) model in server
to get list of models do client.list_models()
pre_prompt_extraction: Same as pre_prompt_summary but for when doing extraction
prompt_extraction: Same as prompt_summary but for when doing extraction
do_sample: see src/gen.py
seed: see src/gen.py
temperature: see src/gen.py
top_p: see src/gen.py
top_k: see src/gen.py
repetition_penalty: see src/gen.py
penalty_alpha: see src/gen.py
max_new_tokens: see src/gen.py
min_max_new_tokens: see src/gen.py
max_input_tokens: see src/gen.py
max_total_input_tokens: see src/gen.py
stream_output: Whether to stream output
max_time: how long to take
add_search_to_context: Whether to do web search and add results to context
chat_conversation: List of tuples for (human, bot) conversation that will be pre-appended to an (instruction, None) case for a query
text_context_list: List of strings to add to context for non-database version of document Q/A for faster handling via API etc.
:param pre_prompt_extraction: Same as pre_prompt_summary but for when doing extraction
:param prompt_extraction: Same as prompt_summary but for when doing extraction
:param do_sample: see src/gen.py
:param seed: see src/gen.py
:param temperature: see src/gen.py
:param top_p: see src/gen.py
:param top_k: see src/gen.py
:param repetition_penalty: see src/gen.py
:param penalty_alpha: see src/gen.py
:param max_new_tokens: see src/gen.py
:param min_max_new_tokens: see src/gen.py
:param max_input_tokens: see src/gen.py
:param max_total_input_tokens: see src/gen.py
:param stream_output: Whether to stream output
:param max_time: how long to take
:param add_search_to_context: Whether to do web search and add results to context
:param chat_conversation: List of tuples for (human, bot) conversation that will be pre-appended to an (instruction, None) case for a query
:param text_context_list: List of strings to add to context for non-database version of document Q/A for faster handling via API etc.
Forces LangChain code path and uses as many entries in list as possible given max_seq_len, with first assumed to be most relevant and to go near prompt.
docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
max_input_tokens: Max input tokens to place into model context for each LLM call
:param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
:param max_input_tokens: Max input tokens to place into model context for each LLM call
-1 means auto, fully fill context for query, and fill by original document chunk for summarization
>=0 means use that to limit context filling to that many tokens
max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action
max_new_tokens: Maximum new tokens
min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
:param max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action
:param max_new_tokens: Maximum new tokens
:param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query
:param docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query
or top_k_docs original document chunks summarization
None or 'split_or_merge' means same as 'chunk' for query, while for summarization merges documents to fill up to max_input_tokens or model_max_len tokens
docs_joiner: string to join lists of text when doing split_or_merge. None means '\n\n'
hyde_level: 0-3 for HYDE.
:param docs_joiner: string to join lists of text when doing split_or_merge. None means '\n\n'
:param hyde_level: 0-3 for HYDE.
0 uses just query to find similarity with docs
1 uses query + pure LLM response to find similarity with docs
2: uses query + LLM response using docs to find similarity with docs
3+: etc.
hyde_template: see src/gen.py
hyde_show_only_final: see src/gen.py
doc_json_mode: see src/gen.py
metadata_in_context: see src/gen.py
:param hyde_template: see src/gen.py
:param hyde_show_only_final: see src/gen.py
:param doc_json_mode: see src/gen.py
:param metadata_in_context: see src/gen.py
:param image_file: Initial image for UI (or actual image for CLI) Vision Q/A. Or list of images for some models
:param image_control: Initial image for UI Image Control
Expand Down Expand Up @@ -889,7 +907,7 @@ def query_or_summarize_or_extract(
and the value is not used to access the inference server.
If need a visible_models for an inference server, then use --model_lock and group together.
asserts: whether to do asserts to ensure handling is correct
:param asserts: whether to do asserts to ensure handling is correct
Returns: summary/answer: str or extraction List[str]
Expand Down
7 changes: 7 additions & 0 deletions openai_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,13 @@ class H2oGPTParams(BaseModel):
pre_prompt_summary: str | None = None
prompt_summary: str | None = None
hyde_llm_prompt: str | None = None

user_prompt_for_fake_system_prompt: str | None = None
json_object_prompt: str | None = None
json_object_prompt_simpler: str | None = None
json_code_prompt: str | None = None
json_schema_instruction: str | None = None

system_prompt: str | None = 'auto'

image_audio_loaders: List | None = None
Expand Down
7 changes: 7 additions & 0 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ def run_cli( # for local function:
top_k_docs=None, chunk=None, chunk_size=None,
pre_prompt_query=None, prompt_query=None,
pre_prompt_summary=None, prompt_summary=None, hyde_llm_prompt=None,

user_prompt_for_fake_system_prompt=None,
json_object_prompt=None,
json_object_prompt_simpler=None,
json_code_prompt=None,
json_schema_instruction=None,

image_audio_loaders=None,
pdf_loaders=None,
url_loaders=None,
Expand Down
7 changes: 7 additions & 0 deletions src/client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ def get_args(prompt, prompt_type=None, chat=False, stream_output=False,
pre_prompt_summary=None,
prompt_summary=None,
hyde_llm_prompt=None,

user_prompt_for_fake_system_prompt=None,
json_object_prompt=None,
json_object_prompt_simpler=None,
json_code_prompt=None,
json_schema_instruction=None,

system_prompt=system_prompt,
image_audio_loaders=None,
pdf_loaders=None,
Expand Down
6 changes: 5 additions & 1 deletion src/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,11 @@ def gr_to_lg(image_audio_loaders,
max_chunks_per_doc_public = 5000
max_chunks_per_doc_public_api = 2 * max_chunks_per_doc_public

user_prompt_for_fake_system_prompt = "Who are you and what do you do?"
user_prompt_for_fake_system_prompt0 = "Who are you and what do you do?"
json_object_prompt0 = 'Ensure your entire response is outputted as a single piece of strict valid JSON text.'
json_object_prompt_simpler0 = 'Ensure your response is strictly valid JSON text.'
json_code_prompt0 = 'Ensure your entire response is outputted as strict valid JSON text inside a Markdown code block with the json language identifier.'
json_schema_instruction0 = 'Ensure you follow this JSON schema:\n```json\n{properties_schema}\n```'

coqui_lock_name = 'coqui'

Expand Down
7 changes: 7 additions & 0 deletions src/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ def run_eval( # for local function:
document_content_substrings_op=None,
pre_prompt_query=None, prompt_query=None,
pre_prompt_summary=None, prompt_summary=None, hyde_llm_prompt=None,

user_prompt_for_fake_system_prompt=None,
json_object_prompt=None,
json_object_prompt_simpler=None,
json_code_prompt=None,
json_schema_instruction=None,

image_audio_loaders=None,
pdf_loaders=None,
url_loaders=None,
Expand Down
9 changes: 9 additions & 0 deletions src/evaluate_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,26 @@
"top_k_docs",
"chunk",
"chunk_size",

"document_subset",
"document_choice",
"document_source_substrings",
"document_source_substrings_op",
"document_content_substrings",
"document_content_substrings_op",

"pre_prompt_query",
"prompt_query",
"pre_prompt_summary",
"prompt_summary",
"hyde_llm_prompt",

"user_prompt_for_fake_system_prompt",
"json_object_prompt",
"json_object_prompt_simpler",
"json_code_prompt",
"json_schema_instruction",

"system_prompt",
]
+ reader_names
Expand Down

0 comments on commit 88b62f1

Please sign in to comment.