Skip to content

Commit

Permalink
Merge pull request #48 from GoGoJoestar/main
Browse files Browse the repository at this point in the history
fix bug in gradio_demo.py when launch vLLM server
  • Loading branch information
ymcui committed Aug 2, 2023
2 parents cce5350 + 4e733da commit 2e0aca6
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions scripts/inference/gradio_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
parser.add_argument(
"--post_host",
type=str,
default="localhost",
default="0.0.0.0",
help="Host of vLLM service.")
parser.add_argument(
"--post_port",
Expand Down Expand Up @@ -128,15 +128,15 @@ def setup():
tokenizer = LlamaTokenizer.from_pretrained(args.tokenizer_path, legacy=True)

print("Start launch vllm server.")
cmd = [
"python -m vllm.entrypoints.api_server",
f"--model={args.base_model}",
f"--tokenizer={args.tokenizer_path}",
"--tokenizer-mode=slow",
f"--tensor-parallel-size={len(args.gpus.split(','))}",
"&",
]
subprocess.check_call(cmd)
cmd = f"python -m vllm.entrypoints.api_server \
--model={args.base_model} \
--tokenizer={args.tokenizer_path} \
--tokenizer-mode=slow \
--tensor-parallel-size={len(args.gpus.split(','))} \
--host {args.post_host} \
--port {args.post_port} \
&"
subprocess.check_call(cmd, shell=True)
else:
max_memory = args.max_memory
port = args.port
Expand Down

0 comments on commit 2e0aca6

Please sign in to comment.