IDEA-Research · mymusise · Jan 19, 2024 · Jan 19, 2024 · Feb 4, 2024 · Feb 4, 2024
diff --git a/app.py b/app.py
@@ -1,7 +1,5 @@
 import argparse
 import os
-os.system("pip install ftfy regex tqdm")
-os.system("pip install git+https://github.com/openai/CLIP.git")
 import sys
 import io
 import gradio as gr
@@ -15,6 +13,7 @@
 from util import box_ops
 from util.config import Config
 from util.utils import clean_state_dict
+from util.draw_openpose import draw_openpose
 
 import matplotlib.pyplot as plt
 from matplotlib.collections import PatchCollection
@@ -192,7 +191,7 @@ def plot_on_image(image_pil, tgt, keypoint_skeleton,keypoint_text_prompt):
 
  sks = np.array(keypoint_skeleton)
  # import pdb;pdb.set_trace()
- if sks !=[]:
+ if sks.shape[0] != 0:
  if sks.min()==1:
  sks = sks - 1
 
@@ -338,7 +337,14 @@ def run_unipose(input_image, instance_text_prompt, keypoint_text_example,box_thr
  }
  # import ipdb; ipdb.set_trace()
  image_with_predict = plot_on_image(image_pil, pred_dict,keypoint_skeleton,keypoint_text_prompt)
- return image_with_predict
+ if instance_text_prompt in ['person', 'face']:
+ try:
+ image_openpose = draw_openpose(keypoints_filt, instance_text_prompt, image)
+ except ValueError as e:
+ print("A exception occurred: ", e)
+ else:
+ image_openpose = None
+ return image_with_predict, image_openpose
 
 
 
@@ -354,7 +360,7 @@ def run_unipose(input_image, instance_text_prompt, keypoint_text_example,box_thr
 model = load_model(config_file, checkpoint_path, cpu_only=False)
 
 if __name__ == "__main__":
-MARKDOWN = \
+ MARKDOWN = \
 """
 ## UniPose: Detecting Any Keypoints
 
@@ -383,10 +389,14 @@ def run_unipose(input_image, instance_text_prompt, keypoint_text_example,box_thr
  type="pil",
 
  ).style(full_width=True, full_height=True)
+ with gr.Column():
+ gallery_openpose = gr.outputs.Image(
+ type="pil",
+ ).style(full_width=True, full_height=True)
 
  run_button.click(fn=run_unipose, inputs=[
- input_image, instance_prompt, keypoint_example,box_threshold,IoU_threshold], outputs=[gallery])
+ input_image, instance_prompt, keypoint_example,box_threshold,IoU_threshold], outputs=[gallery, gallery_openpose])
 
 
- block.launch(share=True)
+ block.launch(share=True, server_name="0.0.0.0")
 
diff --git a/requirements.txt b/requirements.txt
@@ -14,3 +14,6 @@ torch==1.12.1
 torchvision==0.13.1
 transformers==4.22.0
 yapf==0.32.0
+ftfy==6.1.1
+tqdm==4.64.1
+git+https://github.com/openai/CLIP.git
diff --git a/util/draw_openpose.py b/util/draw_openpose.py
@@ -0,0 +1,129 @@
+import math
+import numpy as np
+from PIL import Image
+import matplotlib
+import cv2
+
+
+eps = 0.01
+
+
+def draw_bodypose(canvas, candidate, subset):
+ H, W, C = canvas.shape
+ candidate = np.array(candidate)
+ subset = np.array(subset)
+
+ stickwidth = 4
+
+ limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
+ [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
+ [1, 16], [16, 18], [3, 17], [6, 18]]
+
+ colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
+ [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
+ [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
+
+ for i in range(17):
+ for n in range(len(subset)):
+ index = subset[n][np.array(limbSeq[i]) - 1]
+ if -1 in index:
+ continue
+ Y = candidate[index.astype(int), 0] * float(W)
+ X = candidate[index.astype(int), 1] * float(H)
+ mX = np.mean(X)
+ mY = np.mean(Y)
+ length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+ angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+ polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+ cv2.fillConvexPoly(canvas, polygon, colors[i])
+
+ canvas = (canvas * 0.6).astype(np.uint8)
+
+ for i in range(18):
+ for n in range(len(subset)):
+ index = int(subset[n][i])
+ if index == -1:
+ continue
+ x, y = candidate[index][0:2]
+ x = int(x * W)
+ y = int(y * H)
+ cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
+
+ return canvas
+
+
+def draw_facepose(canvas, all_lmks):
+ H, W, C = canvas.shape
+ for lmks in all_lmks:
+ lmks = np.array(lmks)
+ for lmk in lmks:
+ x, y = lmk
+ x = int(x * W)
+ y = int(y * H)
+ if x > eps and y > eps:
+ cv2.circle(canvas, (x, y), 3, (255, 255, 255), thickness=-1)
+ return canvas
+
+
+def draw_handpose(canvas, all_hand_peaks):
+ H, W, C = canvas.shape
+
+ edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
+ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
+
+ for peaks in all_hand_peaks:
+ peaks = np.array(peaks)
+
+ for ie, e in enumerate(edges):
+ x1, y1 = peaks[e[0]]
+ x2, y2 = peaks[e[1]]
+ x1 = int(x1 * W)
+ y1 = int(y1 * H)
+ x2 = int(x2 * W)
+ y2 = int(y2 * H)
+ if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
+ cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=2)
+
+ for i, keyponit in enumerate(peaks):
+ x, y = keyponit
+ x = int(x * W)
+ y = int(y * H)
+ if x > eps and y > eps:
+ cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
+ return canvas
+
+
+def unipose_kps2openpose_kps(unipose_kps):
+ # support person only
+ kps = unipose_kps.view(1, 17, 2).numpy()
+ neck = np.mean(kps[:, [5, 6]], axis=1)
+ new_kps = np.insert(kps, 17, neck, axis=1)
+ new_kps
+
+ mmpose_idx = [
+ 17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
+ ]
+ openpose_idx = [
+ 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
+ ]
+ new_kps[:, openpose_idx] = new_kps[:, mmpose_idx]
+ new_kps[0].shape
+ return new_kps
+
+
+def draw_openpose(filtered_keypoints, instance_text_prompt, image):
+ _, H, W = image.shape
+ canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
+ for idx, kps in enumerate(filtered_keypoints):
+ if instance_text_prompt == 'person':
+ new_kps = unipose_kps2openpose_kps(kps)
+ canvas = draw_bodypose(canvas, new_kps[0], [[i for i in range(18)]])
+ else:
+ new_kps = kps.view(1, -1, 2).numpy()
+ canvas = draw_facepose(canvas, new_kps)
+ openpose_pil = Image.fromarray(canvas)
+ return openpose_pil
+
+
+if __name__ == "__main__":
+ pass
diff --git a/util/videos.py b/util/videos.py
@@ -0,0 +1,128 @@
+import importlib
+import os
+import os.path as osp
+import shutil
+import sys
+from pathlib import Path
+
+import av
+import numpy as np
+import torch
+import torchvision
+from einops import rearrange
+from PIL import Image
+
+
+def seed_everything(seed):
+ import random
+
+ import numpy as np
+
+ torch.manual_seed(seed)
+ torch.cuda.manual_seed_all(seed)
+ np.random.seed(seed % (2**32))
+ random.seed(seed)
+
+
+def import_filename(filename):
+ spec = importlib.util.spec_from_file_location("mymodule", filename)
+ module = importlib.util.module_from_spec(spec)
+ sys.modules[spec.name] = module
+ spec.loader.exec_module(module)
+ return module
+
+
+def delete_additional_ckpt(base_path, num_keep):
+ dirs = []
+ for d in os.listdir(base_path):
+ if d.startswith("checkpoint-"):
+ dirs.append(d)
+ num_tot = len(dirs)
+ if num_tot <= num_keep:
+ return
+ # ensure ckpt is sorted and delete the ealier!
+ del_dirs = sorted(dirs, key=lambda x: int(x.split("-")[-1]))[: num_tot - num_keep]
+ for d in del_dirs:
+ path_to_dir = osp.join(base_path, d)
+ if osp.exists(path_to_dir):
+ shutil.rmtree(path_to_dir)
+
+
+def save_videos_from_pil(pil_images, path, fps=8, mp4format='libx264'):
+ import av
+
+ save_fmt = Path(path).suffix
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ width, height = pil_images[0].size
+
+ if save_fmt == ".mp4":
+ codec = mp4format
+ container = av.open(path, "w")
+ stream = container.add_stream(codec, rate=fps)
+
+ stream.width = width
+ stream.height = height
+
+ for pil_image in pil_images:
+ # pil_image = Image.fromarray(image_arr).convert("RGB")
+ av_frame = av.VideoFrame.from_image(pil_image)
+ container.mux(stream.encode(av_frame))
+ container.mux(stream.encode())
+ container.close()
+
+ elif save_fmt == ".gif":
+ pil_images[0].save(
+ fp=path,
+ format="GIF",
+ append_images=pil_images[1:],
+ save_all=True,
+ duration=(1 / fps * 1000),
+ loop=0,
+ )
+ else:
+ raise ValueError("Unsupported file type. Use .mp4 or .gif.")
+
+
+def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=6, fps=8):
+ videos = rearrange(videos, "b c t h w -> t b c h w")
+ height, width = videos.shape[-2:]
+ outputs = []
+
+ for x in videos:
+ x = torchvision.utils.make_grid(x, nrow=n_rows) # (c h w)
+ x = x.transpose(0, 1).transpose(1, 2).squeeze(-1) # (h w c)
+ if rescale:
+ x = (x + 1.0) / 2.0 # -1,1 -> 0,1
+ x = (x * 255).numpy().astype(np.uint8)
+ x = Image.fromarray(x)
+
+ outputs.append(x)
+
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+
+ save_videos_from_pil(outputs, path, fps)
+
+
+def read_frames(video_path):
+ container = av.open(video_path)
+
+ video_stream = next(s for s in container.streams if s.type == "video")
+ frames = []
+ for packet in container.demux(video_stream):
+ for frame in packet.decode():
+ image = Image.frombytes(
+ "RGB",
+ (frame.width, frame.height),
+ frame.to_rgb().to_ndarray(),
+ )
+ frames.append(image)
+
+ return frames
+
+
+def get_fps(video_path):
+ container = av.open(video_path)
+ video_stream = next(s for s in container.streams if s.type == "video")
+ fps = video_stream.average_rate
+ container.close()
+ return fps