Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Progressive output and cancel button for 'Inference' tab #43

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 29 additions & 17 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ def load_lora(lora_name, progress=gr.Progress(track_tqdm=True)):
)

self.generate_btn = gr.Button('Generate', variant='primary')
self.cancel_btn = gr.Button('Cancel', variant='primary')

with gr.Row():
with gr.Column():
Expand Down Expand Up @@ -249,28 +250,37 @@ def load_lora(lora_name, progress=gr.Progress(track_tqdm=True)):


def generate(
prompt,
do_sample,
max_new_tokens,
num_beams,
repeat_penalty,
temperature,
prompt,
do_sample,
max_new_tokens,
num_beams,
repeat_penalty,
temperature,
top_p,
top_k,
progress=gr.Progress(track_tqdm=True)
):
return self.trainer.generate(
prompt,
do_sample=do_sample,
max_new_tokens=max_new_tokens,
num_beams=num_beams,
repetition_penalty=repeat_penalty,
temperature=temperature,
top_p=top_p,
top_k=top_k
)
#Iteratively generate tokens until we either emit max_new_tokens or stop getting new output
for i in range(max_new_tokens):
output_this_iteration = self.trainer.generate(
prompt,
do_sample=do_sample,
max_new_tokens=1,
num_beams=num_beams,
repetition_penalty=repeat_penalty,
temperature=temperature,
top_p=top_p,
top_k=top_k
)
#If we have the same output as last iteration, generation is done
if len(prompt) == len(output_this_iteration):
break

prompt = output_this_iteration
yield output_this_iteration


self.generate_btn.click(
generate_event = self.generate_btn.click(
fn=generate,
inputs=[
self.prompt,
Expand All @@ -285,6 +295,8 @@ def generate(
outputs=[self.output]
)

self.cancel_btn.click(fn=None, inputs=None, outputs=None, cancels=[generate_event])

def layout(self):
with gr.Blocks() as demo:
with gr.Row():
Expand Down
3 changes: 2 additions & 1 deletion trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def load_model(self, model_name, force=False, **kwargs):
load_in_8bit=True,
torch_dtype=torch.float16,
)
#Clear the collection that tracks which adapters are loaded, as they are associated with self.model
self.loras = {}

if model_name.startswith('decapoda-research/llama'):
self.tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name)
Expand Down Expand Up @@ -74,7 +76,6 @@ def load_lora(self, lora_name, replace_model=True):

if peft_config.base_model_name_or_path != self.model_name:
self.load_model(peft_config.base_model_name_or_path)
self.loras = {}

assert self.model_name is not None
assert self.model is not None
Expand Down