From 77d5f549575f304088832783482f4502f5ce5069 Mon Sep 17 00:00:00 2001 From: ZFTurbo Date: Fri, 31 Aug 2018 17:41:43 +0300 Subject: [PATCH] Initial commit --- README.md | 58 +++ a00_utils_and_constants.py | 302 +++++++++++ a01_ensemble_boxes_functions.py | 134 +++++ create_files_for_training_by_levels.py | 488 ++++++++++++++++++ ...redictions_from_level_1_predictions_csv.py | 89 ++++ retinanet_inference_example.py | 197 +++++++ retinanet_training_level_1/callbacks/eval.py | 93 ++++ .../convert_retinanet_model.py | 71 +++ .../find_image_parameters.py | 76 +++ .../open_images_generator.py | 374 ++++++++++++++ .../train_oid_level_1_resnet101.py | 422 +++++++++++++++ .../train_oid_level_1_resnet152.py | 418 +++++++++++++++ 12 files changed, 2722 insertions(+) create mode 100644 README.md create mode 100644 a00_utils_and_constants.py create mode 100644 a01_ensemble_boxes_functions.py create mode 100644 create_files_for_training_by_levels.py create mode 100644 create_higher_level_predictions_from_level_1_predictions_csv.py create mode 100644 retinanet_inference_example.py create mode 100644 retinanet_training_level_1/callbacks/eval.py create mode 100644 retinanet_training_level_1/convert_retinanet_model.py create mode 100644 retinanet_training_level_1/find_image_parameters.py create mode 100644 retinanet_training_level_1/open_images_generator.py create mode 100644 retinanet_training_level_1/train_oid_level_1_resnet101.py create mode 100644 retinanet_training_level_1/train_oid_level_1_resnet152.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..48523d2 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +## Keras-RetinaNet for Open Images Challenge 2018 + +This code was used to get 15th place in Kaggle Google AI Open Images - Object Detection Track competition: +https://www.kaggle.com/c/google-ai-open-images-object-detection-track/leaderboard + +Repository contains the following: +* Pre-trained models (with ResNet101 and ResNet152 backbones) +* Example code to get predictions with these models for any set of images +* Code to train your own classifier based on Keras-RetinaNet and OID dataset +* Code to expand predictions for full 500 classes + +## Requirements + +Python 3.5, Keras 2.2, [Keras-RetinaNet 0.4.1](https://github.com/fizyr/keras-retinanet) + +## Pretrained models + +There are 2 RetinaNet models based on ResNet101 and ResNet152 for 443 classes (only Level 1). + +| Backbone | Image Size | Model (training) | Model (inference) | Small validation mAP | Full validation mAP | +| --- | --- | --- | --- | --- | --- | +| ResNet101 | 728 - 1024 | | | 0.4896 | 0.377631 | +| ResNet152 | 600 - 800 | | | 0.5028 | 0.384009 | + +* Model (training) - can be used to resume training or can be used as pretrain for your own classifier +* Model (inference) - can be used to get prediction boxes for arbitrary images + +## Inference + +Example can be found here: retinanet_inference_example.py + +You need to change files_to_process = glob.glob(DATASET_PATH + 'validation_big/\*.jpg') to your own set of files. +On output you will get "predictions_\*.csv" file with boxes. + +Having these predictions you can expand it to all 500 classes using code from create_higher_level_predictions_from_level_1_predictions_csv.py + +## Training + +For training you need to download OID dataset (~500 GB images): https://storage.googleapis.com/openimages/web/challenge.html + +Next fix paths in a00_utils_and_constants.py + +Then to train on OID dataset you need to run python files in following order: + +* create_files_for_training_by_levels.py +* retinanet_training_level_1/find_image_parameters.py + +then +* retinanet_training_level_1/train_oid_level_1_resnet101.py + +or +* retinanet_training_level_1/train_oid_level_1_resnet152.py + + +## Method description + +* https://www.kaggle.com/c/google-ai-open-images-object-detection-track/discussion/64633 + \ No newline at end of file diff --git a/a00_utils_and_constants.py b/a00_utils_and_constants.py new file mode 100644 index 0000000..6dbdc4b --- /dev/null +++ b/a00_utils_and_constants.py @@ -0,0 +1,302 @@ +# coding: utf-8 +__author__ = 'ZFTurbo: https://kaggle.com/zfturbo' + +import numpy as np +import gzip +import pickle +import os +import glob +import time +import cv2 +import datetime +import pandas as pd +from collections import Counter, defaultdict +import random +import shutil +import operator +# import pyvips +from PIL import Image +import platform +import json + + +if platform.processor() == 'Intel64 Family 6 Model 79 Stepping 1, GenuineIntel': + DATASET_PATH = 'E:/Projects_M2/2018_07_Google_Open_Images/input/' +else: + DATASET_PATH = 'D:/Projects/2018_07_Google_Open_Images/input/' + +ROOT_PATH = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + '/' +INPUT_PATH = ROOT_PATH + 'input/' +OUTPUT_PATH = ROOT_PATH + 'output/' +MODELS_PATH = ROOT_PATH + 'models/' +SUBM_PATH = ROOT_PATH + 'subm/' + +# https://storage.googleapis.com/openimages/challenge_2018/bbox_labels_500_hierarchy_visualizer/circle.html + +LEVEL_1_LABELS = ['Accordion', 'Adhesive tape', 'Airplane', 'Alarm clock', 'Alpaca', 'Ambulance', 'Ant', 'Antelope', + 'Apple', 'Artichoke', 'Asparagus', 'Backpack', 'Bagel', 'Balloon', 'Banana', 'Barge', 'Barrel', + 'Baseball bat', 'Baseball glove', 'Bat', 'Bathroom cabinet', 'Bathtub', 'Beaker', 'Bee', 'Beehive', + 'Beer', 'Bell pepper', 'Belt', 'Bench', 'Bicycle', 'Bicycle helmet', 'Bicycle wheel', 'Bidet', + 'Billboard', 'Billiard table', 'Binoculars', 'Blender', 'Blue jay', 'Book', 'Bookcase', 'Boot', + 'Bottle', 'Bow and arrow', 'Bowl', 'Box', 'Boy', 'Brassiere', 'Bread', 'Briefcase', 'Broccoli', + 'Bronze sculpture', 'Brown bear', 'Bull', 'Burrito', 'Bus', 'Bust', 'Butterfly', 'Cabbage', + 'Cabinetry', 'Cake', 'Cake stand', 'Camel', 'Camera', 'Canary', 'Candle', 'Candy', 'Cannon', + 'Canoe', 'Carrot', 'Cart', 'Castle', 'Cat', 'Caterpillar', 'Cattle', 'Ceiling fan', 'Cello', + 'Centipede', 'Chair', 'Cheetah', 'Chest of drawers', 'Chicken', 'Chopsticks', 'Christmas tree', + 'Coat', 'Cocktail', 'Coconut', 'Coffee', 'Coffee cup', 'Coffee table', 'Coffeemaker', 'Coin', + 'Common fig', 'Computer keyboard', 'Computer monitor', 'Computer mouse', 'Convenience store', + 'Cookie', 'Corded phone', 'Countertop', 'Cowboy hat', 'Crab', 'Cricket ball', 'Crocodile', + 'Croissant', 'Crown', 'Crutch', 'Cucumber', 'Cupboard', 'Curtain', 'Cutting board', 'Dagger', + 'Deer', 'Desk', 'Dice', 'Digital clock', 'Dinosaur', 'Dog', 'Dog bed', 'Doll', 'Dolphin', + 'Door', 'Door handle', 'Doughnut', 'Dragonfly', 'Drawer', 'Dress', 'Drinking straw', 'Drum', + 'Duck', 'Dumbbell', 'Eagle', 'Earrings', 'Egg', 'Elephant', 'Envelope', 'Falcon', 'Fedora', + 'Filing cabinet', 'Fire hydrant', 'Fireplace', 'Flag', 'Flashlight', 'Flowerpot', 'Flute', + 'Food processor', 'Football', 'Football helmet', 'Fork', 'Fountain', 'Fox', 'French fries', + 'Frog', 'Frying pan', 'Gas stove', 'Giraffe', 'Girl', 'Glasses', 'Goat', 'Goggles', 'Goldfish', + 'Golf ball', 'Golf cart', 'Gondola', 'Goose', 'Grape', 'Grapefruit', 'Guacamole', 'Guitar', + 'Hamburger', 'Hamster', 'Handbag', 'Handgun', 'Harbor seal', 'Harp', 'Harpsichord', 'Headphones', + 'Helicopter', 'High heels', 'Honeycomb', 'Horn', 'Horse', 'Hot dog', 'House', 'Houseplant', + 'Human arm', 'Human beard', 'Human ear', 'Human eye', 'Human face', 'Human foot', 'Human hair', + 'Human hand', 'Human head', 'Human leg', 'Human mouth', 'Human nose', 'Ice cream', 'Infant bed', + 'Jacket', 'Jaguar', 'Jeans', 'Jellyfish', 'Jet ski', 'Jug', 'Juice', 'Kangaroo', 'Kettle', + 'Kitchen & dining room table', 'Kitchen knife', 'Kite', 'Knife', 'Ladder', 'Ladybug', 'Lamp', + 'Lantern', 'Laptop', 'Lavender', 'Lemon', 'Leopard', 'Lifejacket', 'Light bulb', 'Light switch', + 'Lighthouse', 'Lily', 'Limousine', 'Lion', 'Lizard', 'Lobster', 'Loveseat', 'Lynx', 'Man', + 'Mango', 'Maple', 'Measuring cup', 'Mechanical fan', 'Microphone', 'Microwave oven', 'Miniskirt', + 'Mirror', 'Missile', 'Mixer', 'Mobile phone', 'Monkey', 'Motorcycle', 'Mouse', 'Muffin', 'Mug', + 'Mule', 'Mushroom', 'Musical keyboard', 'Nail', 'Necklace', 'Nightstand', 'Oboe', 'Office building', + 'Orange', 'Organ', 'Ostrich', 'Otter', 'Oven', 'Owl', 'Oyster', 'Paddle', 'Palm tree', 'Pancake', + 'Paper towel', 'Parachute', 'Parrot', 'Pasta', 'Peach', 'Pear', 'Pen', 'Penguin', 'Piano', + 'Picnic basket', 'Picture frame', 'Pig', 'Pillow', 'Pineapple', 'Pitcher', 'Pizza', 'Plastic bag', + 'Plate', 'Platter', 'Polar bear', 'Pomegranate', 'Popcorn', 'Porch', 'Porcupine', 'Poster', + 'Potato', 'Power plugs and sockets', 'Pressure cooker', 'Pretzel', 'Printer', 'Pumpkin', + 'Punching bag', 'Rabbit', 'Raccoon', 'Radish', 'Raven', 'Refrigerator', 'Rhinoceros', 'Rifle', + 'Ring binder', 'Rocket', 'Roller skates', 'Rose', 'Rugby ball', 'Ruler', 'Salad', + 'Salt and pepper shakers', 'Sandal', 'Saucer', 'Saxophone', 'Scarf', 'Scissors', 'Scoreboard', + 'Screwdriver', 'Sea lion', 'Sea turtle', 'Seahorse', 'Seat belt', 'Segway', 'Serving tray', + 'Sewing machine', 'Shark', 'Sheep', 'Shelf', 'Shirt', 'Shorts', 'Shotgun', 'Shower', 'Shrimp', + 'Sink', 'Skateboard', 'Ski', 'Skull', 'Skyscraper', 'Slow cooker', 'Snail', 'Snake', 'Snowboard', + 'Snowman', 'Snowmobile', 'Snowplow', 'Sock', 'Sofa bed', 'Sombrero', 'Sparrow', 'Spatula', + 'Spider', 'Spoon', 'Sports uniform', 'Squirrel', 'Stairs', 'Starfish', 'Stationary bicycle', + 'Stool', 'Stop sign', 'Strawberry', 'Street light', 'Stretcher', 'Studio couch', + 'Submarine sandwich', 'Suit', 'Suitcase', 'Sun hat', 'Sunflower', 'Sunglasses', 'Surfboard', + 'Sushi', 'Swan', 'Swim cap', 'Swimming pool', 'Swimwear', 'Sword', 'Table tennis racket', + 'Tablet computer', 'Taco', 'Tank', 'Tap', 'Tart', 'Taxi', 'Tea', 'Teapot', 'Teddy bear', + 'Television', 'Tennis ball', 'Tennis racket', 'Tent', 'Tiara', 'Tick', 'Tie', 'Tiger', 'Tin can', + 'Tire', 'Toaster', 'Toilet', 'Toilet paper', 'Tomato', 'Torch', 'Tortoise', 'Towel', 'Tower', + 'Traffic light', 'Train', 'Training bench', 'Treadmill', 'Tripod', 'Trombone', 'Truck', + 'Trumpet', 'Turkey', 'Umbrella', 'Van', 'Vase', 'Vehicle registration plate', 'Violin', + 'Volleyball', 'Waffle', 'Wall clock', 'Washing machine', 'Waste container', 'Watch', + 'Watermelon', 'Whale', 'Wheel', 'Wheelchair', 'Whiteboard', 'Willow', 'Window', + 'Window blind', 'Wine', 'Wine glass', 'Winter melon', 'Wok', 'Woman', 'Wood-burning stove', + 'Woodpecker', 'Wrench', 'Zebra', 'Zucchini'] + + +LEVEL_2_LABELS = ['Toy', 'Home appliance', 'Plumbing fixture', 'Office supplies', 'Tableware', 'Kitchen appliance', + 'Couch', 'Bed', 'Table', 'Clock', 'Sculpture', 'Traffic sign', 'Building', 'Person', 'Dessert', + 'Fruit', 'Shellfish', 'Squash', 'Sandwich', 'Tree', 'Flower', 'Car', 'Boat', 'Aircraft', 'Hat', + 'Skirt', 'Glove', 'Trousers', 'Footwear', 'Luggage and bags', 'Helmet', 'Bird', + 'Marine invertebrates', 'Beetle', 'Moths and butterflies', 'Bear', 'Marine mammal', 'Turtle', + 'Fish', 'Personal care', 'Musical instrument', 'Ball', 'Racket', 'Weapon', 'Telephone', + 'Drink'] + +LEVEL_3_LABELS = ['Seafood', 'Watercraft', 'Insect', 'Carnivore'] + +# Some classes upper to make more than one class for single net +LEVEL_4_LABELS = ['Vegetable', 'Land vehicle', 'Reptile', 'Invertebrate'] + +# Some classes upper to make more than one class for single net +LEVEL_5_LABELS = ['Furniture', 'Vehicle', 'Animal'] + +# Classes with less than 500 samples in train +LEVEL_1_LABELS_LOW_SAMPLES = ['Adhesive tape', 'Alarm clock', 'Ambulance', 'Artichoke', 'Asparagus', 'Bathroom cabinet', + 'Beaker', 'Belt', 'Bidet', 'Binoculars', 'Blender', 'Blue jay', 'Briefcase', 'Burrito', + 'Cabbage', 'Cake stand', 'Canary', 'Ceiling fan', 'Centipede', 'Coffeemaker', 'Common fig', + 'Corded phone', 'Cricket ball', 'Croissant', 'Crutch', 'Cutting board', 'Dagger', + 'Digital clock', 'Dog bed', 'Drinking straw', 'Dumbbell', 'Envelope', 'Filing cabinet', + 'Fire hydrant', 'Flashlight', 'Flute', 'Food processor', 'Frying pan', 'Golf ball', + 'Guacamole', 'Harp', 'Harpsichord', 'Honeycomb', 'Hot dog', 'Infant bed', + 'Kitchen knife', 'Light switch', 'Limousine', 'Lynx', 'Mango', 'Measuring cup', + 'Microwave oven', 'Mixer', 'Nail', 'Oboe', 'Organ', 'Paper towel', 'Picnic basket', + 'Pitcher', 'Popcorn', 'Porcupine', 'Power plugs and sockets', 'Pressure cooker', + 'Pretzel', 'Printer', 'Punching bag', 'Raccoon', 'Ring binder', 'Rugby ball', 'Ruler', + 'Salt and pepper shakers', 'Scissors', 'Screwdriver', 'Seahorse', 'Seat belt', + 'Serving tray', 'Sewing machine', 'Shower', 'Slow cooker', 'Snowmobile', 'Snowplow', + 'Spatula', 'Stationary bicycle', 'Stop sign', 'Stretcher', 'Submarine sandwich', + 'Tiara', 'Tick', 'Toaster', 'Toilet paper', 'Torch', 'Towel', 'Training bench', + 'Treadmill', 'Winter melon', 'Wood-burning stove', 'Wrench'] + + +def save_in_file(arr, file_name): + pickle.dump(arr, gzip.open(file_name, 'wb+', compresslevel=3)) + + +def load_from_file(file_name): + return pickle.load(gzip.open(file_name, 'rb')) + + +def save_in_file_fast(arr, file_name): + pickle.dump(arr, open(file_name, 'wb')) + + +def load_from_file_fast(file_name): + return pickle.load(open(file_name, 'rb')) + + +def show_image(im, name='image'): + cv2.imshow(name, im.astype(np.uint8)) + cv2.waitKey(0) + cv2.destroyAllWindows() + + +def show_resized_image(P, w=1000, h=1000): + res = cv2.resize(P.astype(np.uint8), (w, h), interpolation=cv2.INTER_CUBIC) + show_image(res) + + +def get_date_string(): + return datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") + + +def sort_dict_by_values(a, reverse=True): + sorted_x = sorted(a.items(), key=operator.itemgetter(1), reverse=reverse) + return sorted_x + + +def value_counts_for_list(lst): + a = dict(Counter(lst)) + a = sort_dict_by_values(a, True) + return a + + +def read_single_image(path): + use_pyvips = False + try: + if not use_pyvips: + img = np.array(Image.open(path)) + else: + # Much faster in case you have pyvips installed (uncomment import pyvips in top of file) + img = pyvips.Image.new_from_file(path, access='sequential') + img = np.ndarray(buffer=img.write_to_memory(), + dtype=np.uint8, + shape=[img.height, img.width, img.bands]) + except: + try: + img = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB) + except: + print('Fail') + return None + + if len(img.shape) == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + + if img.shape[2] == 2: + img = img[:, :, :1] + + if img.shape[2] == 1: + img = np.concatenate((img, img, img), axis=2) + + if img.shape[2] > 3: + img = img[:, :, :3] + + return img + + +def get_description_for_labels(): + out = open(INPUT_PATH + 'class-descriptions-boxable.csv') + lines = out.readlines() + ret_1, ret_2 = dict(), dict() + for l in lines: + arr = l.strip().split(',') + ret_1[arr[0]] = arr[1] + ret_2[arr[1]] = arr[0] + return ret_1, ret_2 + + +def read_image_bgr_fast(path): + img2 = read_single_image(path) + img2 = img2[:, :, ::-1] + return img2 + + +def get_subcategories(sub_cat, upper_cat, level, l, d1, sub): + ret = [] + sub_cat[upper_cat] = ([], []) + for j, k in enumerate(l[sub]): + nm = d1[k['LabelName']] + sub_cat[upper_cat][1].append(nm) + if nm in sub_cat: + continue + ret.append(nm) + if 'Subcategory' in k: + get_subcategories(sub_cat, nm, level + 1, l, d1, 'Subcategory') + else: + sub_cat[nm] = ([upper_cat], []) + return ret + + +def get_hierarchy_structures(): + sub_cat = dict() + part_cat = dict() + d1, d2 = get_description_for_labels() + arr = json.load(open(INPUT_PATH + 'bbox_labels_600_hierarchy.json', 'r')) + lst = dict(arr.items())['Subcategory'] + for i, l in enumerate(lst): + nm = d1[l['LabelName']] + if 'Subcategory' in l: + get_subcategories(sub_cat, nm, 1, l, d1, 'Subcategory') + else: + if nm in sub_cat: + print('Strange!') + exit() + sub_cat[nm] = [], [] + return sub_cat + + +def set_parents(parents, name_list, l, d1): + for j, k in enumerate(l['Subcategory']): + nm = d1[k['LabelName']] + parents[nm] += name_list + if 'Subcategory' in k: + set_parents(parents, name_list + [nm], k, d1) + + +def get_parents_labels(): + d1, d2 = get_description_for_labels() + parents = dict() + for r in d2.keys(): + parents[r] = [] + + arr = json.load(open(INPUT_PATH + 'bbox_labels_600_hierarchy.json', 'r')) + lst = dict(arr.items())['Subcategory'] + for i, l in enumerate(lst): + nm = d1[l['LabelName']] + if 'Subcategory' in l: + set_parents(parents, [nm], l, d1) + # print(parents) + for p in parents: + parents[p] = list(set(parents[p])) + return parents + + +def get_description_for_labels_500(): + out = open(INPUT_PATH + 'challenge-2018-class-descriptions-500.csv') + lines = out.readlines() + ret_1, ret_2 = dict(), dict() + for l in lines: + arr = l.strip().split(',') + ret_1[arr[0]] = arr[1] + ret_2[arr[1]] = arr[0] + return ret_1, ret_2 + + +def random_intensity_change1(img, min_change=-20, max_change=20, separate_channel=True): + img = img.astype(np.float32) + delta = random.randint(min_change, max_change) + for j in range(3): + if separate_channel: + delta = random.randint(min_change, max_change) + img[:, :, j] += delta + img[img < 0] = 0 + img[img > 255] = 255 + return img.astype(np.uint8) diff --git a/a01_ensemble_boxes_functions.py b/a01_ensemble_boxes_functions.py new file mode 100644 index 0000000..91f00ef --- /dev/null +++ b/a01_ensemble_boxes_functions.py @@ -0,0 +1,134 @@ +# coding: utf-8 +__author__ = 'ZFTurbo: https://kaggle.com/zfturbo' + +import numpy as np + + +def nms_standard(dets, thresh): + scores = dets[:, 0] + x1 = dets[:, 1] + y1 = dets[:, 2] + x2 = dets[:, 3] + y2 = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep + + +def bb_intersection_over_union(boxA, boxB): + # determine the (x, y)-coordinates of the intersection rectangle + xA = max(boxA[0], boxB[0]) + yA = max(boxA[1], boxB[1]) + xB = min(boxA[2], boxB[2]) + yB = min(boxA[3], boxB[3]) + + # compute the area of intersection rectangle + interArea = max(0, xB - xA) * max(0, yB - yA) + + if interArea == 0: + return 0.0 + + # compute the area of both the prediction and ground-truth + # rectangles + boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1]) + boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1]) + + # compute the intersection over union by taking the intersection + # area and dividing it by the sum of prediction + ground-truth + # areas - the interesection area + iou = interArea / float(boxAArea + boxBArea - interArea) + + # return the intersection over union value + return iou + + +def filter_boxes(boxes, scores, labels, thr): + new_boxes = [] + for i in range(boxes.shape[0]): + box = [] + for j in range(boxes.shape[1]): + label = labels[i, j].astype(np.int64) + score = scores[i, j] + if score < thr: + break + # Fix for mirror predictions + if i == 0: + b = [int(label), float(score), float(boxes[i, j, 0]), float(boxes[i, j, 1]), float(boxes[i, j, 2]), float(boxes[i, j, 3])] + else: + b = [int(label), float(score), 1 - float(boxes[i, j, 2]), float(boxes[i, j, 1]), 1 - float(boxes[i, j, 0]), float(boxes[i, j, 3])] + box.append(b) + new_boxes.append(box) + return new_boxes + + +def find_matching_box(boxes_list, new_box, match_iou=0.55): + best_iou = match_iou + best_index = -1 + for i in range(len(boxes_list)): + box = boxes_list[i] + if box[0] != new_box[0]: + continue + iou = bb_intersection_over_union(box[2:], new_box[2:]) + if iou > best_iou: + best_index = i + best_iou = iou + + return best_index, best_iou + + +def merge_boxes_weighted(box1, box2, w1, w2, type): + box = [-1, -1, -1, -1, -1, -1] + box[0] = box1[0] + if type == 'avg': + box[1] = ((w1 * box1[1]) + (w2 * box2[1])) / (w1 + w2) + elif type == 'max': + box[1] = max(box1[1], box2[1]) + elif type == 'mul': + box[1] = np.sqrt(box1[1]*box2[1]) + else: + exit() + box[2] = (w1*box1[2] + w2*box2[2]) / (w1 + w2) + box[3] = (w1*box1[3] + w2*box2[3]) / (w1 + w2) + box[4] = (w1*box1[4] + w2*box2[4]) / (w1 + w2) + box[5] = (w1*box1[5] + w2*box2[5]) / (w1 + w2) + return box + + +def merge_all_boxes_for_image(boxes, intersection_thr=0.55, type='avg'): + + new_boxes = boxes[0].copy() + init_weight = 1/len(boxes) + weights = [init_weight] * len(new_boxes) + + for j in range(1, len(boxes)): + for k in range(len(boxes[j])): + index, best_iou = find_matching_box(new_boxes, boxes[j][k], intersection_thr) + if index != -1: + new_boxes[index] = merge_boxes_weighted(new_boxes[index], boxes[j][k], weights[index], init_weight, type) + weights[index] += init_weight + else: + new_boxes.append(boxes[j][k]) + weights.append(init_weight) + + for i in range(len(new_boxes)): + new_boxes[i][1] *= weights[i] + return np.array(new_boxes) diff --git a/create_files_for_training_by_levels.py b/create_files_for_training_by_levels.py new file mode 100644 index 0000000..5643191 --- /dev/null +++ b/create_files_for_training_by_levels.py @@ -0,0 +1,488 @@ +# coding: utf-8 +__author__ = 'ZFTurbo: https://kaggle.com/zfturbo' + + +from a00_utils_and_constants import * + + +def get_empty_df(negative_samples): + neg_samp = pd.DataFrame(negative_samples, columns=['ImageID']) + neg_samp['Source'] = 'freeform' + neg_samp['LabelName'] = '' + neg_samp['Confidence'] = 1.0 + neg_samp['XMin'] = '' + neg_samp['XMax'] = '' + neg_samp['YMin'] = '' + neg_samp['YMax'] = '' + neg_samp['IsOccluded'] = 0 + neg_samp['IsTruncated'] = 0 + neg_samp['IsGroupOf'] = 0 + neg_samp['IsDepiction'] = 0 + neg_samp['IsInside'] = 0 + return neg_samp + + +def create_level1_files(): + out_dir = OUTPUT_PATH + 'level_1_files/' + if not os.path.isdir(out_dir): + os.mkdir(out_dir) + + remove_group_of = True + + labels_to_find = [] + d1, d2 = get_description_for_labels() + out = open(out_dir + 'class-descriptions-boxable-level-1.csv', 'w') + for l in LEVEL_1_LABELS: + out.write("{},{}\n".format(d2[l], l)) + labels_to_find.append(d2[l]) + out.close() + + negative_sample_classes = [d2[f] for f in ['Armadillo', 'Axe', 'Balance beam', 'Band-aid', 'Banjo', 'Bomb', 'Bottle opener', + 'Bowling equipment', 'Calculator', 'Can opener', 'Cantaloupe', 'Cassette deck', + 'Cat furniture', 'Chainsaw', 'Cheese', 'Chime', 'Chisel', 'Closet', + 'Cocktail shaker', 'Cooking spray', 'Cream', 'Diaper', 'Dishwasher', 'Drill', + 'Eraser', 'Face powder', 'Facial tissue holder', 'Fax', 'Flying disc', 'Grinder', + 'Hair dryer', 'Hair spray', 'Hammer', 'Hand dryer', 'Harmonica', 'Heater', + 'Hedgehog', 'Hiking equipment', 'Hippopotamus', 'Horizontal bar', 'Human body', + 'Humidifier', 'Indoor rower', 'Ipod', 'Isopod', 'Jacuzzi', 'Koala', 'Ladle', + 'Lipstick', 'Magpie', 'Maracas', 'Milk', 'Mixing bowl', 'Panda', 'Paper cutter', + 'Parking meter', 'Pencil case', 'Pencil sharpener', 'Perfume', 'Pizza cutter', + 'Ratchet', 'Rays and skates', 'Red panda', 'Remote control', 'Scale', 'Scorpion', + 'Skunk', 'Soap dispenser', 'Spice rack', 'Squid', 'Stapler', 'Stethoscope', + 'Submarine', 'Syringe', 'Toothbrush', 'Tree house', 'Unicycle', 'Waffle iron', + 'Wardrobe', 'Whisk', 'Wine rack', 'Worm']] + not_negative = [d2[f] for f in LEVEL_2_LABELS + LEVEL_3_LABELS + LEVEL_4_LABELS + LEVEL_5_LABELS] + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/validation-annotations-bbox.csv') + print(len(boxes)) + + # Remove Group Of boxes! + if remove_group_of: + boxes = boxes[boxes['IsGroupOf'] == 0].copy() + print(len(boxes)) + + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)] + print(len(reduced_boxes)) + + negative_classes = boxes[boxes['LabelName'].isin(negative_sample_classes)]['ImageID'].unique() + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list(set(negative_classes) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'validation-annotations-bbox-level-1.csv', index=False) + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/train-annotations-bbox.csv') + print(len(boxes)) + + # Remove Group Of boxes! + if remove_group_of: + boxes = boxes[boxes['IsGroupOf'] == 0].copy() + print(len(boxes)) + + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)] + print(len(reduced_boxes)) + + negative_classes = boxes[boxes['LabelName'].isin(negative_sample_classes)]['ImageID'].unique() + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list(set(negative_classes) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'train-annotations-bbox-level-1.csv', index=False) + + +def create_level2_files(): + out_dir = OUTPUT_PATH + 'level_2_files/' + if not os.path.isdir(out_dir): + os.mkdir(out_dir) + + remove_group_of = True + + labels_to_find = [] + d1, d2 = get_description_for_labels() + out = open(out_dir + 'class-descriptions-boxable-level-2.csv', 'w') + for l in LEVEL_2_LABELS: + out.write("{},{}\n".format(d2[l], l)) + labels_to_find.append(d2[l]) + out.close() + + parents = get_parents_labels() + print(parents) + lvl2_specific_labels = ['Flying disc', 'Heater', 'Hair dryer', 'Humidifier', 'Dishwasher', 'Hand dryer', + 'Calculator', 'Stapler', 'Pencil sharpener', 'Eraser', 'Fax', 'Pencil case', 'Paper cutter', + 'Mixing bowl', 'Cocktail shaker', 'Waffle iron', 'Dishwasher', 'Tree house', 'Cantaloupe', + 'Magpie', 'Isopod', 'Squid', 'Panda', 'Rays and skates', 'Toothbrush', 'Cream', 'Diaper', + 'Banjo', 'Harmonica', 'Chime', 'Maracas', 'Axe', 'Bomb'] + not_negative = [d2[f] for f in LEVEL_3_LABELS + LEVEL_4_LABELS + LEVEL_5_LABELS] + list_of_childs = LEVEL_1_LABELS + lvl2_specific_labels + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/validation-annotations-bbox.csv') + print(len(boxes)) + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)].copy() + print(len(reduced_boxes)) + + # Remove Group Of boxes! + if remove_group_of: + reduced_boxes = reduced_boxes[reduced_boxes['IsGroupOf'] == 0].copy() + print(len(reduced_boxes)) + + parts_list = [] + for lvl1 in list_of_childs: + for p in parents[lvl1]: + if p in LEVEL_2_LABELS: + print('{} - {} ({})'.format(p, lvl1, d2[lvl1])) + small_part = boxes[boxes['LabelName'] == d2[lvl1]].copy() + small_part['LabelName'] = d2[p] + parts_list.append(small_part) + print(len(small_part)) + reduced_boxes = pd.concat([reduced_boxes] + parts_list, axis=0) + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list( + set(boxes['ImageID'].unique()) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'validation-annotations-bbox-level-2.csv', index=False) + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/train-annotations-bbox.csv') + print(len(boxes)) + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)].copy() + print(len(reduced_boxes)) + + # Remove Group Of boxes! + if remove_group_of: + reduced_boxes = reduced_boxes[reduced_boxes['IsGroupOf'] == 0].copy() + print(len(reduced_boxes)) + + parts_list = [] + for lvl1 in list_of_childs: + for p in parents[lvl1]: + if p in LEVEL_2_LABELS: + print('{} - {} ({})'.format(p, lvl1, d2[lvl1])) + small_part = boxes[boxes['LabelName'] == d2[lvl1]].copy() + small_part['LabelName'] = d2[p] + parts_list.append(small_part) + print(len(small_part)) + reduced_boxes = pd.concat([reduced_boxes] + parts_list, axis=0) + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list( + set(boxes['ImageID'].unique()) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'train-annotations-bbox-level-2.csv', index=False) + + +def create_level3_files(): + out_dir = OUTPUT_PATH + 'level_3_files/' + if not os.path.isdir(out_dir): + os.mkdir(out_dir) + + labels_to_find = [] + d1, d2 = get_description_for_labels() + out = open(out_dir + 'class-descriptions-boxable-level-3.csv', 'w') + for l in LEVEL_3_LABELS: + out.write("{},{}\n".format(d2[l], l)) + labels_to_find.append(d2[l]) + out.close() + + parents = get_parents_labels() + print(parents) + lvl3_specific_labels = ['Squid', 'Submarine', 'Panda', 'Red panda'] + not_negative = [d2[f] for f in LEVEL_4_LABELS + LEVEL_5_LABELS] + list_of_childs = LEVEL_1_LABELS + LEVEL_2_LABELS + lvl3_specific_labels + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/validation-annotations-bbox.csv') + print(len(boxes)) + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)].copy() + print(len(reduced_boxes)) + + parts_list = [] + for lvl1 in list_of_childs: + for p in parents[lvl1]: + if p in LEVEL_3_LABELS: + print('{} - {} ({})'.format(p, lvl1, d2[lvl1])) + small_part = boxes[boxes['LabelName'] == d2[lvl1]].copy() + small_part['LabelName'] = d2[p] + parts_list.append(small_part) + print(len(small_part)) + + reduced_boxes = pd.concat([reduced_boxes] + parts_list, axis=0) + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list( + set(boxes['ImageID'].unique()) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'validation-annotations-bbox-level-3.csv', index=False) + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/train-annotations-bbox.csv') + print(len(boxes)) + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)].copy() + print(len(reduced_boxes)) + + parts_list = [] + for lvl1 in list_of_childs: + for p in parents[lvl1]: + if p in LEVEL_3_LABELS: + print('{} - {} ({})'.format(p, lvl1, d2[lvl1])) + small_part = boxes[boxes['LabelName'] == d2[lvl1]].copy() + small_part['LabelName'] = d2[p] + parts_list.append(small_part) + print(len(small_part)) + + reduced_boxes = pd.concat([reduced_boxes] + parts_list, axis=0) + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list( + set(boxes['ImageID'].unique()) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'train-annotations-bbox-level-3.csv', index=False) + + +def create_level4_files(): + out_dir = OUTPUT_PATH + 'level_4_files/' + if not os.path.isdir(out_dir): + os.mkdir(out_dir) + + labels_to_find = [] + d1, d2 = get_description_for_labels() + out = open(out_dir + 'class-descriptions-boxable-level-4.csv', 'w') + for l in LEVEL_4_LABELS: + out.write("{},{}\n".format(d2[l], l)) + labels_to_find.append(d2[l]) + out.close() + + parents = get_parents_labels() + print(parents) + lvl4_specific_labels = ['Unicycle', 'Isopod', 'Squid', 'Scorpion', 'Worm'] + not_negative = [d2[f] for f in LEVEL_5_LABELS] + list_of_childs = LEVEL_1_LABELS + LEVEL_2_LABELS + LEVEL_3_LABELS + lvl4_specific_labels + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/validation-annotations-bbox.csv') + print(len(boxes)) + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)].copy() + print(len(reduced_boxes)) + + parts_list = [] + for lvl1 in list_of_childs: + for p in parents[lvl1]: + if p in LEVEL_4_LABELS: + print('{} - {} ({})'.format(p, lvl1, d2[lvl1])) + small_part = boxes[boxes['LabelName'] == d2[lvl1]].copy() + small_part['LabelName'] = d2[p] + parts_list.append(small_part) + print(len(small_part)) + + reduced_boxes = pd.concat([reduced_boxes] + parts_list, axis=0) + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list(set(boxes['ImageID'].unique()) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'validation-annotations-bbox-level-4.csv', index=False) + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/train-annotations-bbox.csv') + print(len(boxes)) + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)].copy() + print(len(reduced_boxes)) + + parts_list = [] + for lvl1 in list_of_childs: + for p in parents[lvl1]: + if p in LEVEL_4_LABELS: + print('{} - {} ({})'.format(p, lvl1, d2[lvl1])) + small_part = boxes[boxes['LabelName'] == d2[lvl1]].copy() + small_part['LabelName'] = d2[p] + parts_list.append(small_part) + print(len(small_part)) + + reduced_boxes = pd.concat([reduced_boxes] + parts_list, axis=0) + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list( + set(boxes['ImageID'].unique()) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'train-annotations-bbox-level-4.csv', index=False) + + +def create_level5_files(): + out_dir = OUTPUT_PATH + 'level_5_files/' + if not os.path.isdir(out_dir): + os.mkdir(out_dir) + + labels_to_find = [] + d1, d2 = get_description_for_labels() + out = open(out_dir + 'class-descriptions-boxable-level-5.csv', 'w') + for l in LEVEL_5_LABELS: + out.write("{},{}\n".format(d2[l], l)) + labels_to_find.append(d2[l]) + out.close() + + parents = get_parents_labels() + print(parents) + lvl5_specific_labels = ['Wine rack', 'Wardrobe', 'Closet', 'Unicycle', 'Submarine', 'Magpie', 'Isopod', 'Squid', + 'Scorpion', 'Worm', 'Mammal', 'Panda', 'Red panda', 'Koala', 'Hippopotamus', 'Hedgehog', + 'Skunk', 'Armadillo', 'Rays and skates'] + list_of_childs = LEVEL_1_LABELS + LEVEL_2_LABELS + LEVEL_3_LABELS + LEVEL_4_LABELS + lvl5_specific_labels + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/validation-annotations-bbox.csv') + print(len(boxes)) + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)].copy() + print(len(reduced_boxes)) + + parts_list = [] + for lvl1 in list_of_childs: + for p in parents[lvl1]: + if p in LEVEL_5_LABELS: + print('{} - {} ({})'.format(p, lvl1, d2[lvl1])) + small_part = boxes[boxes['LabelName'] == d2[lvl1]].copy() + small_part['LabelName'] = d2[p] + parts_list.append(small_part) + print(len(small_part)) + reduced_boxes = pd.concat([reduced_boxes] + parts_list, axis=0) + negative_samples = list(set(boxes['ImageID'].unique()) - set(reduced_boxes['ImageID'].unique())) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + + reduced_boxes.to_csv(out_dir + 'validation-annotations-bbox-level-5.csv', index=False) + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/train-annotations-bbox.csv') + print(len(boxes)) + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)].copy() + print(len(reduced_boxes)) + + parts_list = [] + for lvl1 in list_of_childs: + for p in parents[lvl1]: + if p in LEVEL_5_LABELS: + print('{} - {} ({})'.format(p, lvl1, d2[lvl1])) + small_part = boxes[boxes['LabelName'] == d2[lvl1]].copy() + small_part['LabelName'] = d2[p] + parts_list.append(small_part) + print(len(small_part)) + reduced_boxes = pd.concat([reduced_boxes] + parts_list, axis=0) + negative_samples = list(set(boxes['ImageID'].unique()) - set(reduced_boxes['ImageID'].unique())) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'train-annotations-bbox-level-5.csv', index=False) + + +def create_level1_low_samples_files(): + out_dir = OUTPUT_PATH + 'level_1_low_samples_files/' + if not os.path.isdir(out_dir): + os.mkdir(out_dir) + + remove_group_of = True + + labels_to_find = [] + d1, d2 = get_description_for_labels() + out = open(out_dir + 'class-descriptions-boxable-level-1.csv', 'w') + for l in LEVEL_1_LABELS_LOW_SAMPLES: + out.write("{},{}\n".format(d2[l], l)) + labels_to_find.append(d2[l]) + out.close() + + neg_samp_l1 = list(set(LEVEL_1_LABELS) - set(LEVEL_1_LABELS_LOW_SAMPLES)) + print(len(neg_samp_l1)) + + negative_sample_classes = [d2[f] for f in ['Armadillo', 'Axe', 'Balance beam', 'Band-aid', 'Banjo', 'Bomb', 'Bottle opener', + 'Bowling equipment', 'Calculator', 'Can opener', 'Cantaloupe', 'Cassette deck', + 'Cat furniture', 'Chainsaw', 'Cheese', 'Chime', 'Chisel', 'Closet', + 'Cocktail shaker', 'Cooking spray', 'Cream', 'Diaper', 'Dishwasher', 'Drill', + 'Eraser', 'Face powder', 'Facial tissue holder', 'Fax', 'Flying disc', 'Grinder', + 'Hair dryer', 'Hair spray', 'Hammer', 'Hand dryer', 'Harmonica', 'Heater', + 'Hedgehog', 'Hiking equipment', 'Hippopotamus', 'Horizontal bar', 'Human body', + 'Humidifier', 'Indoor rower', 'Ipod', 'Isopod', 'Jacuzzi', 'Koala', 'Ladle', + 'Lipstick', 'Magpie', 'Maracas', 'Milk', 'Mixing bowl', 'Panda', 'Paper cutter', + 'Parking meter', 'Pencil case', 'Pencil sharpener', 'Perfume', 'Pizza cutter', + 'Ratchet', 'Rays and skates', 'Red panda', 'Remote control', 'Scale', 'Scorpion', + 'Skunk', 'Soap dispenser', 'Spice rack', 'Squid', 'Stapler', 'Stethoscope', + 'Submarine', 'Syringe', 'Toothbrush', 'Tree house', 'Unicycle', 'Waffle iron', + 'Wardrobe', 'Whisk', 'Wine rack', 'Worm'] + neg_samp_l1] + not_negative = [d2[f] for f in LEVEL_2_LABELS + LEVEL_3_LABELS + LEVEL_4_LABELS + LEVEL_5_LABELS] + + if 1: + boxes = pd.read_csv(DATASET_PATH + 'annotations/validation-annotations-bbox.csv') + print(len(boxes)) + + # Remove Group Of boxes! + if remove_group_of: + boxes = boxes[boxes['IsGroupOf'] == 0].copy() + print(len(boxes)) + + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)] + print(len(reduced_boxes)) + + negative_classes = boxes[boxes['LabelName'].isin(negative_sample_classes)]['ImageID'].unique() + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list(set(negative_classes) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'validation-annotations-bbox-level-1.csv', index=False) + + if remove_group_of: + boxes = pd.read_csv(DATASET_PATH + 'annotations/train-annotations-bbox.csv') + print(len(boxes)) + + # Remove Group Of boxes! + if 1: + boxes = boxes[boxes['IsGroupOf'] == 0].copy() + print(len(boxes)) + + reduced_boxes = boxes[boxes['LabelName'].isin(labels_to_find)] + print(len(reduced_boxes)) + + negative_classes = boxes[boxes['LabelName'].isin(negative_sample_classes)]['ImageID'].unique() + additional_remove = boxes[boxes['LabelName'].isin(not_negative)]['ImageID'].unique() + print('Additional images to remove: {}'.format(len(additional_remove))) + negative_samples = list(set(negative_classes) - set(reduced_boxes['ImageID'].unique()) - set(additional_remove)) + print('Length of negative samples: {}'.format(len(negative_samples))) + neg_samp = get_empty_df(negative_samples) + + reduced_boxes = pd.concat([reduced_boxes, neg_samp], axis=0) + reduced_boxes.to_csv(out_dir + 'train-annotations-bbox-level-1.csv', index=False) + + +if __name__ == '__main__': + create_level1_files() + if 0: + create_level2_files() + create_level3_files() + create_level4_files() + create_level5_files() + create_level1_low_samples_files() diff --git a/create_higher_level_predictions_from_level_1_predictions_csv.py b/create_higher_level_predictions_from_level_1_predictions_csv.py new file mode 100644 index 0000000..e2c10a7 --- /dev/null +++ b/create_higher_level_predictions_from_level_1_predictions_csv.py @@ -0,0 +1,89 @@ +# coding: utf-8 +__author__ = 'ZFTurbo: https://kaggle.com/zfturbo' + + +from a00_utils_and_constants import * +from a01_ensemble_boxes_functions import * + + +def extend_boxes(boxes, d1, d2, parents, return_only_new=False): + intersection_thr = 0.75 + print('Initial boxes: {}'.format(boxes.shape)) + + # Add all parents boxes + new_boxes = [] + for i in range(boxes.shape[0]): + class_name = d1[boxes[i][0]] + for p in parents[class_name]: + if p in d2: + new_boxes.append(np.array([d2[p]] + list(boxes[i][1:]))) + new_boxes = np.array(new_boxes) + + if len(new_boxes) > 0: + # Filter them with NMS + unique_labels = np.unique(new_boxes[:, 0]) + # print('Unique parents [{}]: {}'.format(len(unique_labels), [d1[x] for x in unique_labels])) + keep_boxes = [] + for u in unique_labels: + part_boxes = new_boxes[new_boxes[:, 0] == u].copy() + kp = nms_standard(part_boxes[:, 1:].astype(np.float64).copy(), intersection_thr) + keep_boxes.append(part_boxes[kp].copy()) + merged_boxes = np.concatenate(keep_boxes, axis=0) + else: + merged_boxes = new_boxes.copy() + print('Found parent boxes: {} Reduced with NMS: {}'.format(len(new_boxes), len(merged_boxes))) + + # Concat with older + if return_only_new is False: + if len(merged_boxes) > 0: + new_boxes = np.concatenate((boxes, merged_boxes), axis=0) + else: + new_boxes = boxes.copy() + else: + new_boxes = merged_boxes.copy() + print('Total boxes: {}'.format(new_boxes.shape)) + return new_boxes + + +def flatten_boxes(boxes): + s = '' + for i in range(boxes.shape[0]): + for j in range(boxes.shape[1]): + s += str(boxes[i, j]) + ' ' + return s + + +def create_higher_level_classes_from_csv(input_subm, out_file, return_only_new=False): + d1, d2 = get_description_for_labels_500() + parents = get_parents_labels() + + subm = pd.read_csv(input_subm) + ids = subm['ImageId'].values + preds = subm['PredictionString'].values + preds_modified = [] + for i in range(len(ids)): + print('Go for {}'.format(ids[i])) + id = ids[i] + if str(preds[i]) == 'nan': + preds_modified.append('') + continue + arr = preds[i].strip().split(' ') + if len(arr) % 6 != 0: + print('Some problem here! {}'.format(id)) + exit() + boxes = [] + for j in range(0, len(arr), 6): + part = arr[j:j + 6] + boxes.append(part) + boxes = np.array(boxes) + new_boxes = extend_boxes(boxes, d1, d2, parents, return_only_new) + box_str = flatten_boxes(new_boxes) + preds_modified.append(box_str) + subm['PredictionString'] = preds_modified + subm.to_csv(out_file, index=False) + + +if __name__ == '__main__': + create_higher_level_classes_from_csv(SUBM_PATH + 'retinanet_training_level_1.csv', + SUBM_PATH + 'retinanet_level_1_all_levels.csv', + return_only_new=True) diff --git a/retinanet_inference_example.py b/retinanet_inference_example.py new file mode 100644 index 0000000..162b913 --- /dev/null +++ b/retinanet_inference_example.py @@ -0,0 +1,197 @@ +# coding: utf-8 +__author__ = 'ZFTurbo: https://kaggle.com/zfturbo' + + +if __name__ == '__main__': + import os + gpu_use = 0 + print('GPU use: {}'.format(gpu_use)) + os.environ["KERAS_BACKEND"] = "tensorflow" + os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(gpu_use) + + +from a00_utils_and_constants import * +from a01_ensemble_boxes_functions import * + + +def show_image_debug(id_to_labels, draw, boxes, scores, labels): + from keras_retinanet.utils.visualization import draw_box, draw_caption + from keras_retinanet.utils.colors import label_color + + # visualize detections + for box, score, label in zip(boxes[0], scores[0], labels[0]): + # scores are sorted so we can break + if score < 0.3: + break + + color = (0, 255, 0) + + b = box.astype(int) + draw_box(draw, b, color=color) + + caption = "{} {:.3f}".format(id_to_labels[label], score) + draw_caption(draw, b, caption) + draw = cv2.cvtColor(draw, cv2.COLOR_RGB2BGR) + show_image(draw) + + +def get_retinanet_predictions_for_files(files, out_dir, pretrained_model_path, backbone): + from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image + from keras_retinanet import models + + show_debug_images = False + show_mirror_predictions = False + + model = models.load_model(pretrained_model_path, backbone_name=backbone) + print('Proc {} files...'.format(len(files))) + for f in files: + id = os.path.basename(f)[:-4] + + cache_path = out_dir + id + '.pkl' + if os.path.isfile(cache_path): + continue + + # try: + image = read_image_bgr_fast(f) + + if show_debug_images: + # copy to draw on + draw = image.copy() + draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB) + + # preprocess image for network + image = preprocess_image(image) + if backbone == 'resnet152': + image, scale = resize_image(image, min_side=600, max_side=800) + elif backbone == 'resnet101': + image, scale = resize_image(image, min_side=768, max_side=1024) + + # Add mirror + image = np.stack((image, image[:, ::-1, :]), axis=0) + + # process image + start = time.time() + print('ID: {} Image shape: {} Scale: {}'.format(id, image.shape, scale)) + boxes, scores, labels = model.predict_on_batch(image) + print('Detections shape: {} {} {}'.format(boxes.shape, scores.shape, labels.shape)) + print("Processing time: {:.2f} sec".format(time.time() - start)) + + if show_debug_images: + if show_mirror_predictions: + draw = draw[:, ::-1, :] + boxes_init = boxes.copy() + boxes_init /= scale + + boxes[:, :, 0] /= image.shape[2] + boxes[:, :, 2] /= image.shape[2] + boxes[:, :, 1] /= image.shape[1] + boxes[:, :, 3] /= image.shape[1] + + if show_debug_images: + if show_mirror_predictions: + show_image_debug(LEVEL_1_LABELS, draw.astype(np.uint8), boxes_init[1:], scores[1:], labels[1:]) + else: + show_image_debug(LEVEL_1_LABELS, draw.astype(np.uint8), boxes_init[:1], scores[:1], labels[:1]) + + save_in_file_fast((boxes, scores, labels), cache_path) + + +def create_csv_for_retinanet(input_dir, out_file, label_arr, skip_box_thr=0.05, intersection_thr=0.55, limit_boxes=300, type='avg'): + out = open(out_file, 'w') + out.write('ImageId,PredictionString\n') + d1, d2 = get_description_for_labels() + files = glob.glob(input_dir + '*.pkl') + for f in files: + id = os.path.basename(f)[:-4] + boxes, scores, labels = load_from_file_fast(f) + filtered_boxes = filter_boxes(boxes, scores, labels, skip_box_thr) + merged_boxes = merge_all_boxes_for_image(filtered_boxes, intersection_thr, type) + print(id, len(filtered_boxes[0]), len(filtered_boxes[1]), len(merged_boxes)) + if len(merged_boxes) > limit_boxes: + # sort by score + merged_boxes = np.array(merged_boxes) + merged_boxes = merged_boxes[merged_boxes[:, 1].argsort()[::-1]][:limit_boxes] + + out.write(id + ',') + for i in range(len(merged_boxes)): + label = int(merged_boxes[i][0]) + score = merged_boxes[i][1] + b = merged_boxes[i][2:] + + google_name = label_arr[label] + if '/' not in google_name: + google_name = d2[google_name] + + xmin = b[0] + if xmin < 0: + xmin = 0 + if xmin > 1: + xmin = 1 + + xmax = b[2] + if xmax < 0: + xmax = 0 + if xmax > 1: + xmax = 1 + + ymin = b[1] + if ymin < 0: + ymin = 0 + if ymin > 1: + ymin = 1 + + ymax = b[3] + if ymax < 0: + ymax = 0 + if ymax > 1: + ymax = 1 + + if (xmax < xmin): + print('X min value larger than max value {}: {} {}'.format(label_arr[label], xmin, xmax)) + continue + + if (ymax < ymin): + print('Y min value larger than max value {}: {} {}'.format(label_arr[label], ymin, ymax)) + continue + + if abs(xmax - xmin) < 1e-5: + print('Too small diff for {}: {} and {}'.format(label_arr[label], xmin, xmax)) + continue + + if abs(ymax - ymin) < 1e-5: + print('Too small diff for {}: {} and {}'.format(label_arr[label], ymin, ymax)) + continue + + str1 = "{} {:.6f} {:.4f} {:.4f} {:.4f} {:.4f} ".format(google_name, score, xmin, ymin, xmax, ymax) + out.write(str1) + out.write('\n') + + +if __name__ == '__main__': + skip_box_confidence = 0.01 + iou_thr = 0.55 + limit_boxes_per_image = 300 + type = 'avg' + + output_cache_directory = OUTPUT_PATH + 'cache_retinanet_level_1/' + if not os.path.isdir(output_cache_directory): + os.mkdir(output_cache_directory) + + # files_to_process = glob.glob(INPUT_PATH + 'kaggle/challenge2018_test/*.jpg') + files_to_process = glob.glob(DATASET_PATH + 'validation_big/*.jpg') + + if 1: + backbone = 'resnet101' + pretrained_model_path = MODELS_PATH + 'retinanet_resnet101_level_1_converted.h5' + labels_list = LEVEL_1_LABELS + + if 0: + backbone = 'resnet152' + pretrained_model_path = MODELS_PATH + 'retinanet_resnet152_level_1_converted.h5' + labels_list = LEVEL_1_LABELS + + get_retinanet_predictions_for_files(files_to_process, output_cache_directory, pretrained_model_path, backbone) + create_csv_for_retinanet(output_cache_directory, + SUBM_PATH + 'predictions_{}_{}_{}.csv'.format(skip_box_confidence, iou_thr, type), + labels_list, + skip_box_confidence, iou_thr, limit_boxes_per_image, type=type) \ No newline at end of file diff --git a/retinanet_training_level_1/callbacks/eval.py b/retinanet_training_level_1/callbacks/eval.py new file mode 100644 index 0000000..22d3957 --- /dev/null +++ b/retinanet_training_level_1/callbacks/eval.py @@ -0,0 +1,93 @@ +""" +Copyright 2017-2018 Fizyr (https://fizyr.com) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import keras +from keras_retinanet.utils.eval import evaluate + + +class Evaluate(keras.callbacks.Callback): + """ Evaluation callback for arbitrary datasets. + """ + + def __init__(self, generator, iou_threshold=0.5, score_threshold=0.05, max_detections=100, save_path=None, tensorboard=None, save_map_path=None, verbose=1): + """ Evaluate a given dataset using a given model at the end of every epoch during training. + + # Arguments + generator : The generator that represents the dataset to evaluate. + iou_threshold : The threshold used to consider when a detection is positive or negative. + score_threshold : The score confidence threshold to use for detections. + max_detections : The maximum number of detections to use per image. + save_path : The path to save images with visualized detections to. + tensorboard : Instance of keras.callbacks.TensorBoard used to log the mAP value. + verbose : Set the verbosity level, by default this is set to 1. + """ + self.generator = generator + self.iou_threshold = iou_threshold + self.score_threshold = score_threshold + self.max_detections = max_detections + self.save_path = save_path + self.tensorboard = tensorboard + self.verbose = verbose + self.save_map_path = save_map_path + + super(Evaluate, self).__init__() + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + + # run evaluation + average_precisions = evaluate( + self.generator, + self.model, + iou_threshold=self.iou_threshold, + score_threshold=self.score_threshold, + max_detections=self.max_detections, + save_path=self.save_path + ) + + # compute per class average precision + present_classes = 0 + precision = 0 + for label, (average_precision, num_annotations ) in average_precisions.items(): + if self.verbose == 1: + print('{:.0f} instances of class'.format(num_annotations), + self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision)) + if self.save_map_path is not None: + out = open(self.save_map_path, 'a') + out.write('{:.0f} instances of class {} with average precision: {:.4f}\n'.format(num_annotations, self.generator.label_to_name(label), average_precision)) + out.close() + if num_annotations > 0: + present_classes += 1 + precision += average_precision + self.mean_ap = precision / present_classes + + if self.tensorboard is not None and self.tensorboard.writer is not None: + import tensorflow as tf + summary = tf.Summary() + summary_value = summary.value.add() + summary_value.simple_value = self.mean_ap + summary_value.tag = "mAP" + self.tensorboard.writer.add_summary(summary, epoch) + + logs['mAP'] = self.mean_ap + + if self.save_map_path is not None: + out = open(self.save_map_path, 'a') + out.write('Ep {}: mAP: {:.4f}\n'.format(epoch + 1, self.mean_ap)) + out.close() + + if self.verbose == 1: + print('mAP: {:.4f}'.format(self.mean_ap)) diff --git a/retinanet_training_level_1/convert_retinanet_model.py b/retinanet_training_level_1/convert_retinanet_model.py new file mode 100644 index 0000000..f1c9945 --- /dev/null +++ b/retinanet_training_level_1/convert_retinanet_model.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python + +""" +Copyright 2017-2018 Fizyr (https://fizyr.com) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +if __name__ == '__main__': + import os + gpu_use = 0 + print('GPU use: {}'.format(gpu_use)) + os.environ["KERAS_BACKEND"] = "tensorflow" + os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(gpu_use) + + +import argparse +import sys + + +# Change these to absolute imports if you copy this script outside the keras_retinanet package. +from keras_retinanet import models + + +def parse_args(args): + parser = argparse.ArgumentParser(description='Script for converting a training model to an inference model.') + + parser.add_argument('model_in', help='The model to convert.') + parser.add_argument('model_out', help='Path to save the converted model to.') + parser.add_argument('--backbone', help='The backbone of the model to convert.', default='resnet50') + parser.add_argument('--no-nms', help='Disables non maximum suppression.', dest='nms', action='store_false') + parser.add_argument('--no-class-specific-filter', help='Disables class specific filtering.', dest='class_specific_filter', action='store_false') + + return parser.parse_args(args) + + +def main(args=None): + # parse arguments + if args is None: + args = sys.argv[1:] + args = parse_args(args) + + # load and convert model + model = models.load_model(args.model_in, convert=True, backbone_name=args.backbone, nms=args.nms, class_specific_filter=args.class_specific_filter) + + # save model + model.save(args.model_out) + + +if __name__ == '__main__': + params = [ + '../retinanet_level_1/snapshots/resnet101_oid_274.h5', + '../retinanet_level_1/snapshots/resnet101_oid_274_converted.h5', + '--backbone', 'resnet101' + ] + params = [ + '../retinanet_level_1/snapshots/resnet152_oid_262.h5', + '../retinanet_level_1/snapshots/resnet152_oid_262_converted.h5', + '--backbone', 'resnet152' + ] + main(params) diff --git a/retinanet_training_level_1/find_image_parameters.py b/retinanet_training_level_1/find_image_parameters.py new file mode 100644 index 0000000..f296ff1 --- /dev/null +++ b/retinanet_training_level_1/find_image_parameters.py @@ -0,0 +1,76 @@ +# coding: utf-8 +__author__ = 'ZFTurbo: https://kaggle.com/zfturbo' + + +from a00_utils_and_constants import * +from hashlib import md5 + + +STORAGE_PATH_TRAIN = DATASET_PATH + 'train/' +STORAGE_PATH_TEST = DATASET_PATH + 'test/' +STORAGE_PATH_VALID = DATASET_PATH + 'validation_big/' +STORAGE_PATH_KAGGLE_TEST = INPUT_PATH + 'kaggle/challenge2018_test/' + + +def get_md5(fname): + hash_md5 = md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + + +def get_shape(f): + try: + img = pyvips.Image.new_from_file(f, access='sequential') + shape = (img.height, img.width, img.bands) + except: + try: + img = np.array(Image.open(f)) + shape = img.shape + except: + try: + img = cv2.imread(f) + shape = img.shape + except: + shape = (0, 0, 0) + return shape + + +def get_image_stat(type): + out_file = OUTPUT_PATH + '{}_image_params.csv'.format(type) + out = open(out_file, 'w') + out.write('id,width,height,channel,size,md5\n') + if type == 'train': + files = glob.glob(STORAGE_PATH_TRAIN + '*/*.jpg') + elif type == 'test': + files = glob.glob(STORAGE_PATH_TEST + '*.jpg') + elif type == 'validation': + files = glob.glob(STORAGE_PATH_VALID + '*.jpg') + elif type == 'kaggle_test': + files = glob.glob(STORAGE_PATH_KAGGLE_TEST + '*.jpg') + for f in files: + id = os.path.basename(f)[:-4] + print('Go for {}'.format(id)) + h, w, c = get_shape(f) + m = get_md5(f) + sz = os.path.getsize(f) + out.write(id) + out.write(',' + str(w)) + out.write(',' + str(h)) + out.write(',' + str(c)) + out.write(',' + str(sz)) + out.write(',' + str(m)) + out.write('\n') + out.close() + + +if __name__ == '__main__': + try: + import pyvips + except: + print('PYVips not available. Image parameters detection will be slow!') + get_image_stat('validation') + get_image_stat('test') + get_image_stat('train') + get_image_stat('kaggle_test') \ No newline at end of file diff --git a/retinanet_training_level_1/open_images_generator.py b/retinanet_training_level_1/open_images_generator.py new file mode 100644 index 0000000..af03063 --- /dev/null +++ b/retinanet_training_level_1/open_images_generator.py @@ -0,0 +1,374 @@ +""" +Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import csv +import json +import os +import warnings +import random +import numpy as np + +from keras_retinanet.preprocessing.generator import Generator +# from keras_retinanet.utils.image import read_image_bgr +from a00_utils_and_constants import read_image_bgr_fast, OUTPUT_PATH, random_intensity_change1 + + +def get_labels(metadata_dir, version='v4'): + csv_file = 'class-descriptions-boxable-level-1.csv' + + boxable_classes_descriptions = os.path.join(metadata_dir, csv_file) + id_to_labels = {} + cls_index = {} + + i = 0 + with open(boxable_classes_descriptions) as f: + for row in csv.reader(f): + # make sure the csv row is not empty (usually the last one) + if len(row): + label = row[0] + description = row[1].replace("\"", "").replace("'", "").replace('`', '') + id_to_labels[i] = description + cls_index[label] = i + i += 1 + + return id_to_labels, cls_index + + +def get_image_sizes(subset): + import pandas as pd + sizes = pd.read_csv(OUTPUT_PATH + subset + '_image_params.csv') + ret = dict() + ids = sizes['id'].values + ws = sizes['width'].values + ht = sizes['height'].values + for i in range(len(ids)): + ret[ids[i]] = (int(ws[i]), int(ht[i])) + return ret + + +def generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version='v4'): + validation_image_ids = {} + + if version == 'v4': + annotations_path = os.path.join(metadata_dir, '{}-annotations-bbox-level-1.csv'.format(subset)) + elif version == 'challenge2018': + validation_image_ids_path = os.path.join(metadata_dir, 'challenge-2018-image-ids-valset-od.csv') + + with open(validation_image_ids_path, 'r') as csv_file: + reader = csv.DictReader(csv_file, fieldnames=['ImageID']) + reader.next() + for line, row in enumerate(reader): + image_id = row['ImageID'] + validation_image_ids[image_id] = True + + annotations_path = os.path.join(metadata_dir, 'challenge-2018-train-annotations-bbox.csv') + else: + annotations_path = os.path.join(metadata_dir, subset, 'annotations-human-bbox.csv') + + fieldnames = ['ImageID', 'Source', 'LabelName', 'Confidence', + 'XMin', 'XMax', 'YMin', 'YMax', + 'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction', 'IsInside'] + + id_annotations = dict() + with open(annotations_path, 'r') as csv_file: + reader = csv.DictReader(csv_file, fieldnames=fieldnames) + next(reader) + + images_sizes = get_image_sizes(subset) + for line, row in enumerate(reader): + frame = row['ImageID'] + img_id = row['ImageID'] + class_name = row['LabelName'] + + if version == 'challenge2018': + if subset == 'train': + if frame in validation_image_ids: + continue + elif subset == 'validation': + if frame not in validation_image_ids: + continue + else: + raise NotImplementedError('This generator handles only the train and validation subsets') + + if version == 'challenge2018': + # We recommend participants to use the provided subset of the training set as a validation set. + # This is preferable over using the V4 val/test sets, as the training set is more densely annotated. + img_path = os.path.join(main_dir, 'train', frame[:3], frame + '.jpg') + else: + if subset == 'validation': + img_path = os.path.join(main_dir, 'validation', frame + '.jpg') + else: + img_path = os.path.join(main_dir, subset, frame[:3], frame + '.jpg') + + if not os.path.isfile(img_path): + continue + + try: + width, height = images_sizes[frame] + except: + print('Image read error: {}'.format(frame)) + continue + + if class_name == '': + if img_id in id_annotations: + print('Strange duplicate {}'.format(img_id)) + exit() + id_annotations[img_id] = {'w': width, 'h': height, 'boxes': []} + continue + + if class_name not in cls_index: + continue + + cls_id = cls_index[class_name] + + x1 = float(row['XMin']) + x2 = float(row['XMax']) + y1 = float(row['YMin']) + y2 = float(row['YMax']) + + x1_int = int(round(x1 * width)) + x2_int = int(round(x2 * width)) + y1_int = int(round(y1 * height)) + y2_int = int(round(y2 * height)) + + # Check that the bounding box is valid. + if x2 <= x1: + raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1)) + if y2 <= y1: + raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1)) + + if y2_int == y1_int: + warnings.warn('filtering line {}: rounding y2 ({}) and y1 ({}) makes them equal'.format(line, y2, y1)) + continue + + if x2_int == x1_int: + warnings.warn('filtering line {}: rounding x2 ({}) and x1 ({}) makes them equal'.format(line, x2, x1)) + continue + + annotation = {'cls_id': cls_id, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2} + + if img_id in id_annotations: + annotations = id_annotations[img_id] + annotations['boxes'].append(annotation) + else: + id_annotations[img_id] = {'w': width, 'h': height, 'boxes': [annotation]} + return id_annotations + + +def get_class_index_arrays(number_of_classes, annotation_cache_json): + classes = dict() + f = open(annotation_cache_json, 'r') + annotations = json.loads(f.read()) + + classes['empty'] = [] + for c in range(number_of_classes): + classes[c] = [] + + for id in annotations: + if 'boxes' in annotations[id]: + if len(annotations[id]['boxes']) == 0: + classes['empty'].append(id) + for box in annotations[id]['boxes']: + c = box['cls_id'] + classes[c].append(id) + + return classes + + +class OpenImagesGenerator(Generator): + def __init__( + self, main_dir, subset, version='v4', + labels_filter=None, annotation_cache_dir='.', + fixed_labels=False, + **kwargs + ): + + if subset == 'validation': + self.base_dir = os.path.join(main_dir, 'images', 'validation') + else: + self.base_dir = os.path.join(main_dir, 'images', subset) + + metadata_dir = OUTPUT_PATH + 'level_1_files/' + annotation_cache_json = os.path.join(metadata_dir, subset + '_level_1.json') + + self.id_to_labels, cls_index = get_labels(metadata_dir, version=version) + print('Labels length: {}'.format(len(cls_index))) + # print(self.id_to_labels) + # print(cls_index) + # exit() + + if os.path.exists(annotation_cache_json): + with open(annotation_cache_json, 'r') as f: + self.annotations = json.loads(f.read()) + else: + self.annotations = generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version=version) + json.dump(self.annotations, open(annotation_cache_json, "w")) + + if labels_filter is not None: + self.id_to_labels, self.annotations = self.__filter_data(labels_filter, fixed_labels) + + self.id_to_image_id = dict([(i, k) for i, k in enumerate(self.annotations)]) + self.image_id_to_id = dict([(k, i) for i, k in enumerate(self.annotations)]) + self.class_index_array = get_class_index_arrays(len(cls_index), annotation_cache_json) + self.group_method = 'random' + self.subset = subset + + super(OpenImagesGenerator, self).__init__(**kwargs) + + + def __filter_data(self, labels_filter, fixed_labels): + """ + If you want to work with a subset of the labels just set a list with trainable labels + :param labels_filter: Ex: labels_filter = ['Helmet', 'Hat', 'Analog television'] + :param fixed_labels: If fixed_labels is true this will bring you the 'Helmet' label + but also: 'bicycle helmet', 'welding helmet', 'ski helmet' etc... + :return: + """ + + labels_to_id = dict([(l, i) for i, l in enumerate(labels_filter)]) + + sub_labels_to_id = {} + if fixed_labels: + # there is/are no other sublabel(s) other than the labels itself + sub_labels_to_id = labels_to_id + else: + for l in labels_filter: + label = str.lower(l) + for v in [v for v in self.id_to_labels.values() if label in str.lower(v)]: + sub_labels_to_id[v] = labels_to_id[l] + + filtered_annotations = {} + for k in self.annotations: + img_ann = self.annotations[k] + + filtered_boxes = [] + for ann in img_ann['boxes']: + cls_id = ann['cls_id'] + label = self.id_to_labels[cls_id] + if label in sub_labels_to_id: + ann['cls_id'] = sub_labels_to_id[label] + filtered_boxes.append(ann) + + if len(filtered_boxes) > 0: + filtered_annotations[k] = {'w': img_ann['w'], 'h': img_ann['h'], 'boxes': filtered_boxes} + + id_to_labels = dict([(labels_to_id[k], k) for k in labels_to_id]) + return id_to_labels, filtered_annotations + + def size(self): + return len(self.annotations) + + def num_classes(self): + return len(self.id_to_labels) + + def name_to_label(self, name): + raise NotImplementedError() + + def label_to_name(self, label): + return self.id_to_labels[label] + + def image_aspect_ratio(self, image_index): + img_annotations = self.annotations[self.id_to_image_id[image_index]] + height, width = img_annotations['h'], img_annotations['w'] + return float(width) / float(height) + + def image_path(self, image_index): + type = os.path.basename(self.base_dir) + up = os.path.join(os.path.dirname(os.path.dirname(self.base_dir)), type) + id = self.id_to_image_id[image_index] + if type == 'train': + path = os.path.join(up, id[:3], id + '.jpg') + else: + path = os.path.join(up, id + '.jpg') + return path + + def load_image(self, image_index): + # return read_image_bgr(self.image_path(image_index)) + return read_image_bgr_fast(self.image_path(image_index)) + + def load_annotations(self, image_index): + image_annotations = self.annotations[self.id_to_image_id[image_index]] + + labels = image_annotations['boxes'] + height, width = image_annotations['h'], image_annotations['w'] + + boxes = np.zeros((len(labels), 5)) + for idx, ann in enumerate(labels): + cls_id = ann['cls_id'] + x1 = ann['x1'] * width + x2 = ann['x2'] * width + y1 = ann['y1'] * height + y2 = ann['y2'] * height + + boxes[idx, 0] = x1 + boxes[idx, 1] = y1 + boxes[idx, 2] = x2 + boxes[idx, 3] = y2 + boxes[idx, 4] = cls_id + + return boxes + + def group_images(self): + classes = list(range(self.num_classes())) + ['empty'] + self.groups = [] + while 1: + if len(self.groups) > 100000: + break + self.groups.append([]) + for i in range(self.batch_size): + while 1: + random_class = random.choice(classes) + # print(random_class, len(self.class_index_array[random_class])) + if len(self.class_index_array[random_class]) > 0: + random_image = random.choice(self.class_index_array[random_class]) + break + random_image_index = self.image_id_to_id[random_image] + self.groups[-1].append(random_image_index) + + def preprocess_group_entry(self, image, annotations): + """ Preprocess image and its annotations. + """ + + if self.subset != 'validation': + # random color change + image = random_intensity_change1(image, -30, 30, True) + + # preprocess the image + image = self.preprocess_image(image) + + # randomly transform image and annotations + image, annotations = self.random_transform_group_entry(image, annotations) + + # resize image + image, image_scale = self.resize_image(image) + + # apply resizing to annotations too + annotations[:, :4] *= image_scale + + return image, annotations + + def __next__(self): + return self.next() + + def next(self): + # advance the group index + with self.lock: + if self.group_index == 0 and self.shuffle_groups: + self.group_images() + group = self.groups[self.group_index] + self.group_index = (self.group_index + 1) % len(self.groups) + + return self.compute_input_output(group) \ No newline at end of file diff --git a/retinanet_training_level_1/train_oid_level_1_resnet101.py b/retinanet_training_level_1/train_oid_level_1_resnet101.py new file mode 100644 index 0000000..d0d6a49 --- /dev/null +++ b/retinanet_training_level_1/train_oid_level_1_resnet101.py @@ -0,0 +1,422 @@ +#!/usr/bin/env python + +""" +Copyright 2017-2018 Fizyr (https://fizyr.com) +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import argparse +import os +import sys +import warnings + +import keras +import keras.preprocessing.image +import tensorflow as tf + +# Change these to absolute imports if you copy this script outside the keras_retinanet package. +from keras_retinanet import layers # noqa: F401 +from keras_retinanet import losses +from keras_retinanet import models +from keras_retinanet.callbacks import RedirectModel +from retinanet_training_level_1.callbacks.eval import Evaluate +from keras_retinanet.models.retinanet import retinanet_bbox +from keras_retinanet.preprocessing.csv_generator import CSVGenerator +from keras_retinanet.preprocessing.kitti import KittiGenerator +from keras_retinanet.preprocessing.pascal_voc import PascalVocGenerator +from keras_retinanet.utils.anchors import make_shapes_callback +from keras_retinanet.utils.keras_version import check_keras_version +from keras_retinanet.utils.model import freeze as freeze_model +from keras_retinanet.utils.transform import random_transform_generator + +from retinanet_training_level_1.open_images_generator import OpenImagesGenerator +from a00_utils_and_constants import DATASET_PATH + + +def makedirs(path): + # Intended behavior: try to create the directory, + # pass if the directory exists already, fails otherwise. + # Meant for Python 2.7/3.n compatibility. + try: + os.makedirs(path) + except OSError: + if not os.path.isdir(path): + raise + + +def get_session(): + """ Construct a modified tf session. + """ + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + return tf.Session(config=config) + + +def model_with_weights(model, weights, skip_mismatch): + """ Load weights for model. + Args + model : The model to load weights for. + weights : The weights to load. + skip_mismatch : If True, skips layers whose shape of weights doesn't match with the model. + """ + if weights is not None: + model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch) + return model + + +def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0, freeze_backbone=False): + """ Creates three models (model, training_model, prediction_model). + Args + backbone_retinanet : A function to call to create a retinanet model with a given backbone. + num_classes : The number of classes to train. + weights : The weights to load into the model. + multi_gpu : The number of GPUs to use for training. + freeze_backbone : If True, disables learning for the backbone. + Returns + model : The base model. This is also the model that is saved in snapshots. + training_model : The training model. If multi_gpu=0, this is identical to model. + prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS). + """ + modifier = freeze_model if freeze_backbone else None + + # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. + # optionally wrap in a parallel model + if multi_gpu > 1: + from keras.utils import multi_gpu_model + with tf.device('/cpu:0'): + model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) + training_model = multi_gpu_model(model, gpus=multi_gpu) + else: + model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) + training_model = model + + # make prediction model + prediction_model = retinanet_bbox(model=model) + + # compile model + training_model.compile( + loss={ + 'regression' : losses.smooth_l1(), + 'classification': losses.focal() + }, + optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001) + ) + + return model, training_model, prediction_model + + +def create_callbacks(model, training_model, prediction_model, validation_generator, args): + """ Creates the callbacks to use during training. + Args + model: The base model. + training_model: The model that is used for training. + prediction_model: The model that should be used for validation. + validation_generator: The generator for creating validation data. + args: parseargs args object. + Returns: + A list of callbacks used for training. + """ + callbacks = [] + + tensorboard_callback = None + + if args.tensorboard_dir: + tensorboard_callback = keras.callbacks.TensorBoard( + log_dir = args.tensorboard_dir, + histogram_freq = 0, + batch_size = args.batch_size, + write_graph = True, + write_grads = False, + write_images = False, + embeddings_freq = 0, + embeddings_layer_names = None, + embeddings_metadata = None + ) + callbacks.append(tensorboard_callback) + + if args.evaluation and validation_generator: + evaluation = Evaluate(validation_generator, tensorboard=tensorboard_callback, save_map_path='./logs/mAP_stat_resnet101.txt') + evaluation = RedirectModel(evaluation, prediction_model) + callbacks.append(evaluation) + + # save the model + if args.snapshots: + # ensure directory created first; otherwise h5py will error after epoch. + makedirs(args.snapshot_path) + checkpoint = keras.callbacks.ModelCheckpoint( + os.path.join( + args.snapshot_path, + '{backbone}_{dataset_type}_{{epoch:02d}}.h5'.format(backbone=args.backbone, dataset_type=args.dataset_type) + ), + verbose=1, + # save_best_only=True, + # monitor="mAP", + # mode='max' + ) + checkpoint = RedirectModel(checkpoint, model) + callbacks.append(checkpoint) + + callbacks.append(keras.callbacks.ReduceLROnPlateau( + monitor = 'loss', + factor = 0.9, + patience = 2, + verbose = 1, + mode = 'auto', + epsilon = 0.0001, + cooldown = 0, + min_lr = 1e-7 + )) + + return callbacks + + +def create_generators(args, preprocess_image): + """ Create generators for training and validation. + Args + args : parseargs object containing configuration for generators. + preprocess_image : Function that preprocesses an image for the network. + """ + common_args = { + 'batch_size' : args.batch_size, + 'image_min_side' : args.image_min_side, + 'image_max_side' : args.image_max_side, + 'preprocess_image' : preprocess_image, + } + + # create random transform generator for augmenting training data + if args.random_transform: + transform_generator = random_transform_generator( + min_rotation=-0.1, + max_rotation=0.1, + min_translation=(-0.1, -0.1), + max_translation=(0.1, 0.1), + min_shear=-0.1, + max_shear=0.1, + min_scaling=(0.8, 0.8), + max_scaling=(1.2, 1.2), + flip_x_chance=0.5, + flip_y_chance=0.5, + ) + else: + transform_generator = random_transform_generator(flip_x_chance=0.5) + + + train_generator = OpenImagesGenerator( + args.main_dir, + subset='train', + version=args.version, + labels_filter=args.labels_filter, + annotation_cache_dir=args.annotation_cache_dir, + fixed_labels=args.fixed_labels, + transform_generator=transform_generator, + **common_args + ) + + validation_generator = OpenImagesGenerator( + args.main_dir, + subset='validation', + version=args.version, + labels_filter=args.labels_filter, + annotation_cache_dir=args.annotation_cache_dir, + fixed_labels=args.fixed_labels, + **common_args + ) + + return train_generator, validation_generator + + +def check_args(parsed_args): + """ Function to check for inherent contradictions within parsed arguments. + For example, batch_size < num_gpus + Intended to raise errors prior to backend initialisation. + Args + parsed_args: parser.parse_args() + Returns + parsed_args + """ + + if parsed_args.multi_gpu > 1 and parsed_args.batch_size < parsed_args.multi_gpu: + raise ValueError( + "Batch size ({}) must be equal to or higher than the number of GPUs ({})".format(parsed_args.batch_size, + parsed_args.multi_gpu)) + + if parsed_args.multi_gpu > 1 and parsed_args.snapshot: + raise ValueError( + "Multi GPU training ({}) and resuming from snapshots ({}) is not supported.".format(parsed_args.multi_gpu, + parsed_args.snapshot)) + + if parsed_args.multi_gpu > 1 and not parsed_args.multi_gpu_force: + raise ValueError("Multi-GPU support is experimental, use at own risk! Run with --multi-gpu-force if you wish to continue.") + + if 'resnet' not in parsed_args.backbone: + warnings.warn('Using experimental backbone {}. Only resnet50 has been properly tested.'.format(parsed_args.backbone)) + + return parsed_args + + +def parse_args(args): + """ Parse the arguments. + """ + parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') + subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type') + subparsers.required = True + + coco_parser = subparsers.add_parser('coco') + coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).') + + pascal_parser = subparsers.add_parser('pascal') + pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).') + + kitti_parser = subparsers.add_parser('kitti') + kitti_parser.add_argument('kitti_path', help='Path to dataset directory (ie. /tmp/kitti).') + + def csv_list(string): + return string.split(',') + + oid_parser = subparsers.add_parser('oid') + oid_parser.add_argument('main_dir', help='Path to dataset directory.') + oid_parser.add_argument('--version', help='The current dataset version is v4.', default='v4') + oid_parser.add_argument('--labels-filter', help='A list of labels to filter.', type=csv_list, default=None) + oid_parser.add_argument('--annotation-cache-dir', help='Path to store annotation cache.', default='.') + oid_parser.add_argument('--fixed-labels', help='Use the exact specified labels.', default=False) + + csv_parser = subparsers.add_parser('csv') + csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for training.') + csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.') + csv_parser.add_argument('--val-annotations', help='Path to CSV file containing annotations for validation (optional).') + + group = parser.add_mutually_exclusive_group() + group.add_argument('--snapshot', help='Resume training from a snapshot.') + group.add_argument('--imagenet-weights', help='Initialize the model with pretrained imagenet weights. This is the default behaviour.', action='store_const', const=True, default=True) + group.add_argument('--weights', help='Initialize the model with weights from a file.') + group.add_argument('--no-weights', help='Don\'t initialize the model with any weights.', dest='imagenet_weights', action='store_const', const=False) + + parser.add_argument('--backbone', help='Backbone model used by retinanet.', default='resnet50', type=str) + parser.add_argument('--batch-size', help='Size of the batches.', default=1, type=int) + parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).') + parser.add_argument('--multi-gpu', help='Number of GPUs to use for parallel processing.', type=int, default=0) + parser.add_argument('--multi-gpu-force', help='Extra flag needed to enable (experimental) multi-gpu support.', action='store_true') + parser.add_argument('--epochs', help='Number of epochs to train.', type=int, default=500) + parser.add_argument('--steps', help='Number of steps per epoch.', type=int, default=10000) + parser.add_argument('--snapshot-path', help='Path to store snapshots of models during training (defaults to \'./snapshots\')', default='./snapshots') + parser.add_argument('--tensorboard-dir', help='Log directory for Tensorboard output', default='./logs') + parser.add_argument('--no-snapshots', help='Disable saving snapshots.', dest='snapshots', action='store_false') + parser.add_argument('--no-evaluation', help='Disable per epoch evaluation.', dest='evaluation', action='store_false') + parser.add_argument('--freeze-backbone', help='Freeze training of backbone layers.', action='store_true') + parser.add_argument('--random-transform', help='Randomly transform image and annotations.', action='store_true') + parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800) + parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333) + + return check_args(parser.parse_args(args)) + + +def main(args=None): + from keras import backend as K + + # parse arguments + if args is None: + args = sys.argv[1:] + args = parse_args(args) + print('Arguments: {}'.format(args)) + + # create object that stores backbone information + backbone = models.backbone(args.backbone) + + # make sure keras is the minimum required version + check_keras_version() + + # optionally choose specific GPU + if args.gpu: + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu + keras.backend.tensorflow_backend.set_session(get_session()) + + # create the generators + train_generator, validation_generator = create_generators(args, backbone.preprocess_image) + + # create the model + if args.snapshot is not None: + print('Loading model from {}, this may take a second...'.format(args.snapshot)) + model = models.load_model(args.snapshot, backbone_name=args.backbone) + training_model = model + prediction_model = retinanet_bbox(model=model) + else: + weights = args.weights + # default to imagenet if nothing else is specified + if weights is None and args.imagenet_weights: + weights = backbone.download_imagenet() + + print('Creating model, this may take a second...') + model, training_model, prediction_model = create_models( + backbone_retinanet=backbone.retinanet, + num_classes=train_generator.num_classes(), + weights=weights, + multi_gpu=args.multi_gpu, + freeze_backbone=args.freeze_backbone + ) + + # print model summary + print(model.summary()) + + print('Learning rate: {}'.format(K.get_value(model.optimizer.lr))) + # K.set_value(model.optimizer.lr, 1e-5) + print('Learning rate: {}'.format(K.get_value(model.optimizer.lr))) + + # this lets the generator compute backbone layer shapes using the actual backbone model + if 'vgg' in args.backbone or 'densenet' in args.backbone: + train_generator.compute_shapes = make_shapes_callback(model) + if validation_generator: + validation_generator.compute_shapes = train_generator.compute_shapes + + # create the callbacks + callbacks = create_callbacks( + model, + training_model, + prediction_model, + validation_generator, + args, + ) + + init_epoch = 0 + if args.snapshot: + init_epoch = int(args.snapshot.split("_")[-1].split(".")[0]) + print('Init epoch: {}'.format(init_epoch)) + + # start training + training_model.fit_generator( + generator=train_generator, + steps_per_epoch=args.steps, + epochs=args.epochs, + verbose=2, + callbacks=callbacks, + initial_epoch=init_epoch, + ) + + +if __name__ == '__main__': + + # You can start training from intermediate point just uncomment '--snapshot' param + + params = [ + # '--snapshot', './snapshots/resnet101_oid_189.h5', + '--imagenet-weights', + '--gpu', '0', + '--steps', '10000', + # '--multi-gpu', '2', + # '--multi-gpu-force', + # '--backbone', 'mobilenet224_1.0', + '--backbone', 'resnet101', + '--batch-size', '1', + '--image-min-side', '768', + '--image-max-side', '1024', + 'oid', + DATASET_PATH, + ] + main(params) diff --git a/retinanet_training_level_1/train_oid_level_1_resnet152.py b/retinanet_training_level_1/train_oid_level_1_resnet152.py new file mode 100644 index 0000000..612aef8 --- /dev/null +++ b/retinanet_training_level_1/train_oid_level_1_resnet152.py @@ -0,0 +1,418 @@ +#!/usr/bin/env python + +""" +Copyright 2017-2018 Fizyr (https://fizyr.com) +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import argparse +import os +import sys +import warnings + +import keras +import keras.preprocessing.image +import tensorflow as tf + +# Change these to absolute imports if you copy this script outside the keras_retinanet package. +from keras_retinanet import layers # noqa: F401 +from keras_retinanet import losses +from keras_retinanet import models +from keras_retinanet.callbacks import RedirectModel +from keras_retinanet.models.retinanet import retinanet_bbox +from keras_retinanet.utils.anchors import make_shapes_callback +from keras_retinanet.utils.keras_version import check_keras_version +from keras_retinanet.utils.model import freeze as freeze_model +from keras_retinanet.utils.transform import random_transform_generator + +from retinanet_training_level_1.callbacks.eval import Evaluate +from retinanet_training_level_1.open_images_generator import OpenImagesGenerator +from a00_utils_and_constants import DATASET_PATH + + +def makedirs(path): + # Intended behavior: try to create the directory, + # pass if the directory exists already, fails otherwise. + # Meant for Python 2.7/3.n compatibility. + try: + os.makedirs(path) + except OSError: + if not os.path.isdir(path): + raise + + +def get_session(): + """ Construct a modified tf session. + """ + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + return tf.Session(config=config) + + +def model_with_weights(model, weights, skip_mismatch): + """ Load weights for model. + Args + model : The model to load weights for. + weights : The weights to load. + skip_mismatch : If True, skips layers whose shape of weights doesn't match with the model. + """ + if weights is not None: + model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch) + return model + + +def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0, freeze_backbone=False): + """ Creates three models (model, training_model, prediction_model). + Args + backbone_retinanet : A function to call to create a retinanet model with a given backbone. + num_classes : The number of classes to train. + weights : The weights to load into the model. + multi_gpu : The number of GPUs to use for training. + freeze_backbone : If True, disables learning for the backbone. + Returns + model : The base model. This is also the model that is saved in snapshots. + training_model : The training model. If multi_gpu=0, this is identical to model. + prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS). + """ + modifier = freeze_model if freeze_backbone else None + + # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. + # optionally wrap in a parallel model + if multi_gpu > 1: + from keras.utils import multi_gpu_model + with tf.device('/cpu:0'): + model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) + training_model = multi_gpu_model(model, gpus=multi_gpu) + else: + model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) + training_model = model + + # make prediction model + prediction_model = retinanet_bbox(model=model) + + # compile model + training_model.compile( + loss={ + 'regression' : losses.smooth_l1(), + 'classification': losses.focal() + }, + optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001) + ) + + return model, training_model, prediction_model + + +def create_callbacks(model, training_model, prediction_model, validation_generator, args): + """ Creates the callbacks to use during training. + Args + model: The base model. + training_model: The model that is used for training. + prediction_model: The model that should be used for validation. + validation_generator: The generator for creating validation data. + args: parseargs args object. + Returns: + A list of callbacks used for training. + """ + callbacks = [] + + tensorboard_callback = None + + if args.tensorboard_dir: + tensorboard_callback = keras.callbacks.TensorBoard( + log_dir = args.tensorboard_dir, + histogram_freq = 0, + batch_size = args.batch_size, + write_graph = True, + write_grads = False, + write_images = False, + embeddings_freq = 0, + embeddings_layer_names = None, + embeddings_metadata = None + ) + callbacks.append(tensorboard_callback) + + if args.evaluation and validation_generator: + evaluation = Evaluate(validation_generator, tensorboard=tensorboard_callback, save_map_path='./logs/mAP_stat_resnet152.txt') + evaluation = RedirectModel(evaluation, prediction_model) + callbacks.append(evaluation) + + # save the model + if args.snapshots: + # ensure directory created first; otherwise h5py will error after epoch. + makedirs(args.snapshot_path) + checkpoint = keras.callbacks.ModelCheckpoint( + os.path.join( + args.snapshot_path, + '{backbone}_{dataset_type}_{{epoch:02d}}.h5'.format(backbone=args.backbone, dataset_type=args.dataset_type) + ), + verbose=1, + # save_best_only=True, + # monitor="mAP", + # mode='max' + ) + checkpoint = RedirectModel(checkpoint, model) + callbacks.append(checkpoint) + + callbacks.append(keras.callbacks.ReduceLROnPlateau( + monitor = 'loss', + factor = 0.9, + patience = 2, + verbose = 1, + mode = 'auto', + epsilon = 0.0001, + cooldown = 0, + min_lr = 1e-7 + )) + + return callbacks + + +def create_generators(args, preprocess_image): + """ Create generators for training and validation. + Args + args : parseargs object containing configuration for generators. + preprocess_image : Function that preprocesses an image for the network. + """ + common_args = { + 'batch_size' : args.batch_size, + 'image_min_side' : args.image_min_side, + 'image_max_side' : args.image_max_side, + 'preprocess_image' : preprocess_image, + } + + # create random transform generator for augmenting training data + if args.random_transform: + transform_generator = random_transform_generator( + min_rotation=-0.1, + max_rotation=0.1, + min_translation=(-0.1, -0.1), + max_translation=(0.1, 0.1), + min_shear=-0.1, + max_shear=0.1, + min_scaling=(0.8, 0.8), + max_scaling=(1.2, 1.2), + flip_x_chance=0.5, + flip_y_chance=0.5, + ) + else: + transform_generator = random_transform_generator(flip_x_chance=0.5) + + + train_generator = OpenImagesGenerator( + args.main_dir, + subset='train', + version=args.version, + labels_filter=args.labels_filter, + annotation_cache_dir=args.annotation_cache_dir, + fixed_labels=args.fixed_labels, + transform_generator=transform_generator, + **common_args + ) + + validation_generator = OpenImagesGenerator( + args.main_dir, + subset='validation', + version=args.version, + labels_filter=args.labels_filter, + annotation_cache_dir=args.annotation_cache_dir, + fixed_labels=args.fixed_labels, + **common_args + ) + + return train_generator, validation_generator + + +def check_args(parsed_args): + """ Function to check for inherent contradictions within parsed arguments. + For example, batch_size < num_gpus + Intended to raise errors prior to backend initialisation. + Args + parsed_args: parser.parse_args() + Returns + parsed_args + """ + + if parsed_args.multi_gpu > 1 and parsed_args.batch_size < parsed_args.multi_gpu: + raise ValueError( + "Batch size ({}) must be equal to or higher than the number of GPUs ({})".format(parsed_args.batch_size, + parsed_args.multi_gpu)) + + if parsed_args.multi_gpu > 1 and parsed_args.snapshot: + raise ValueError( + "Multi GPU training ({}) and resuming from snapshots ({}) is not supported.".format(parsed_args.multi_gpu, + parsed_args.snapshot)) + + if parsed_args.multi_gpu > 1 and not parsed_args.multi_gpu_force: + raise ValueError("Multi-GPU support is experimental, use at own risk! Run with --multi-gpu-force if you wish to continue.") + + if 'resnet' not in parsed_args.backbone: + warnings.warn('Using experimental backbone {}. Only resnet50 has been properly tested.'.format(parsed_args.backbone)) + + return parsed_args + + +def parse_args(args): + """ Parse the arguments. + """ + parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') + subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type') + subparsers.required = True + + coco_parser = subparsers.add_parser('coco') + coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).') + + pascal_parser = subparsers.add_parser('pascal') + pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).') + + kitti_parser = subparsers.add_parser('kitti') + kitti_parser.add_argument('kitti_path', help='Path to dataset directory (ie. /tmp/kitti).') + + def csv_list(string): + return string.split(',') + + oid_parser = subparsers.add_parser('oid') + oid_parser.add_argument('main_dir', help='Path to dataset directory.') + oid_parser.add_argument('--version', help='The current dataset version is v4.', default='v4') + oid_parser.add_argument('--labels-filter', help='A list of labels to filter.', type=csv_list, default=None) + oid_parser.add_argument('--annotation-cache-dir', help='Path to store annotation cache.', default='.') + oid_parser.add_argument('--fixed-labels', help='Use the exact specified labels.', default=False) + + csv_parser = subparsers.add_parser('csv') + csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for training.') + csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.') + csv_parser.add_argument('--val-annotations', help='Path to CSV file containing annotations for validation (optional).') + + group = parser.add_mutually_exclusive_group() + group.add_argument('--snapshot', help='Resume training from a snapshot.') + group.add_argument('--imagenet-weights', help='Initialize the model with pretrained imagenet weights. This is the default behaviour.', action='store_const', const=True, default=True) + group.add_argument('--weights', help='Initialize the model with weights from a file.') + group.add_argument('--no-weights', help='Don\'t initialize the model with any weights.', dest='imagenet_weights', action='store_const', const=False) + + parser.add_argument('--backbone', help='Backbone model used by retinanet.', default='resnet50', type=str) + parser.add_argument('--batch-size', help='Size of the batches.', default=1, type=int) + parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).') + parser.add_argument('--multi-gpu', help='Number of GPUs to use for parallel processing.', type=int, default=0) + parser.add_argument('--multi-gpu-force', help='Extra flag needed to enable (experimental) multi-gpu support.', action='store_true') + parser.add_argument('--epochs', help='Number of epochs to train.', type=int, default=500) + parser.add_argument('--steps', help='Number of steps per epoch.', type=int, default=10000) + parser.add_argument('--snapshot-path', help='Path to store snapshots of models during training (defaults to \'./snapshots\')', default='./snapshots') + parser.add_argument('--tensorboard-dir', help='Log directory for Tensorboard output', default='./logs') + parser.add_argument('--no-snapshots', help='Disable saving snapshots.', dest='snapshots', action='store_false') + parser.add_argument('--no-evaluation', help='Disable per epoch evaluation.', dest='evaluation', action='store_false') + parser.add_argument('--freeze-backbone', help='Freeze training of backbone layers.', action='store_true') + parser.add_argument('--random-transform', help='Randomly transform image and annotations.', action='store_true') + parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800) + parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333) + + return check_args(parser.parse_args(args)) + + +def main(args=None): + from keras import backend as K + + # parse arguments + if args is None: + args = sys.argv[1:] + args = parse_args(args) + print('Arguments: {}'.format(args)) + + # create object that stores backbone information + backbone = models.backbone(args.backbone) + + # make sure keras is the minimum required version + check_keras_version() + + # optionally choose specific GPU + if args.gpu: + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu + keras.backend.tensorflow_backend.set_session(get_session()) + + # create the generators + train_generator, validation_generator = create_generators(args, backbone.preprocess_image) + + # create the model + if args.snapshot is not None: + print('Loading model from {}, this may take a second...'.format(args.snapshot)) + model = models.load_model(args.snapshot, backbone_name=args.backbone) + training_model = model + prediction_model = retinanet_bbox(model=model) + else: + weights = args.weights + # default to imagenet if nothing else is specified + if weights is None and args.imagenet_weights: + weights = backbone.download_imagenet() + + print('Creating model, this may take a second...') + model, training_model, prediction_model = create_models( + backbone_retinanet=backbone.retinanet, + num_classes=train_generator.num_classes(), + weights=weights, + multi_gpu=args.multi_gpu, + freeze_backbone=args.freeze_backbone + ) + + # print model summary + print(model.summary()) + + print('Learning rate: {}'.format(K.get_value(model.optimizer.lr))) + # K.set_value(model.optimizer.lr, 1e-5) + print('Learning rate: {}'.format(K.get_value(model.optimizer.lr))) + + # this lets the generator compute backbone layer shapes using the actual backbone model + if 'vgg' in args.backbone or 'densenet' in args.backbone: + train_generator.compute_shapes = make_shapes_callback(model) + if validation_generator: + validation_generator.compute_shapes = train_generator.compute_shapes + + # create the callbacks + callbacks = create_callbacks( + model, + training_model, + prediction_model, + validation_generator, + args, + ) + + init_epoch = 0 + if args.snapshot: + init_epoch = int(args.snapshot.split("_")[-1].split(".")[0]) + print('Init epoch: {}'.format(init_epoch)) + + # start training + training_model.fit_generator( + generator=train_generator, + steps_per_epoch=args.steps, + epochs=args.epochs, + verbose=2, + callbacks=callbacks, + initial_epoch=init_epoch, + ) + + +if __name__ == '__main__': + + # You can start training from intermediate point just uncomment '--snapshot' param + + params = [ + # '--snapshot', './snapshots/resnet152_oid_239.h5', + '--imagenet-weights', + '--gpu', '0', + '--steps', '10000', + # '--multi-gpu', '2', + # '--multi-gpu-force', + '--backbone', 'resnet152', + '--batch-size', '1', + '--image-min-side', '600', + '--image-max-side', '800', + 'oid', + DATASET_PATH, + ] + main(params)