THUDM · qingfengfenga · Sep 19, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,2 @@
+.env
+/THUDM
diff --git a/.env b/.env
@@ -0,0 +1,7 @@
+# Start 
+
+# 默认启动webui
+START_MODE=web_demo.py
+
+# 启动Api
+# START_MODE=api.py
diff --git a/.env.template b/.env.template
@@ -0,0 +1,7 @@
+# Start 
+
+# 默认启动webui
+START_MODE=web_demo.py
+
+# 启动Api
+# START_MODE=api.py
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+.env
+/THUDM
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,14 @@
+FROM nvcr.io/nvidia/pytorch:23.06-py3
+
+WORKDIR /app
+
+# RUN git clone --depth=1 https://github.com/THUDM/ChatGLM2-6B.git /app
+
+COPY . .
+
+RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
+ pip install -r requirements.txt
+
+EXPOSE 7860 8000
+
+CMD python ${CLI_ARGS}
diff --git a/README.md b/README.md
@@ -279,7 +279,22 @@ if __name__ == "__main__":
  if hasattr(chunk.choices[0].delta, "content"):
  print(chunk.choices[0].delta.content, end="", flush=True)
 ```
+### Docker 部署
 
+复制一份配置文件 `.env.template` 到`.env`，默认启动模式为`webui`
+
+如前文所述，在`THUDM\chatglm2-6b`文件夹放置好对应的模型文件
+
+编译镜像，启动程序并加载`int4`模型
+> 显存小于`8G`可以使用此启动方式
+```
+docker-compose --profile int4 up -d
+```
+编译镜像，启动程序并加载完整模型
+> 大约需要`13G`显存
+```
+docker-compose --profile int16 up -d
+```
 
 ## 低成本部署
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,43 @@
+version: "3.3"
+services:
+ chatglm2-6b:
+ profiles: ["int16"]
+ build:
+ context: .
+ ports:
+ - 7860:7860
+ - 8000:8000
+ stdin_open: true
+ tty: true
+ volumes:
+ - ./THUDM/chatglm2-6b:/app/THUDM/chatglm2-6b
+ environment:
+ - CLI_ARGS=$START_MODE
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ device_ids: ['0']
+ capabilities: [gpu]
+
+ chatglm2-6b-int4:
+ profiles: ["int4"]
+ build:
+ context: .
+ ports:
+ - 7860:7860
+ - 8000:8000
+ stdin_open: true
+ tty: true
+ volumes:
+ - ./THUDM/chatglm2-6b-int4:/app/THUDM/chatglm2-6b
+ environment:
+ - CLI_ARGS=$START_MODE
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ device_ids: ['0']
+ capabilities: [gpu]
diff --git a/web_demo.py b/web_demo.py
@@ -105,4 +105,4 @@ def reset_state():
 
  emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
 
-demo.queue().launch(share=False, inbrowser=True)
+demo.queue().launch(server_name="0.0.0.0",share=False, inbrowser=True)