Need help in VGG16 Faster R-CNN implementation #11688

quitsv · 2024-05-07T15:13:38Z

I need help in implementing VGG16 for Faster R-CNN for my graduation thesis. This is my first time using mmdetection so I don't really have a clear idea how to create, train, and customize a custom model.

My environment is in google colab with CUDA 12.2.
l ran this code with no issue:

!pip3 install openmim
!mim install mmengine
!pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0
!pip install mmcv==2.1.0 -f https://download.openmmlab.com/mmcv/dist/cu121/torch2.1/index.html

I'm using a custom dataset from roboflow with COCO format with this format:

data
|-- plat
|--|-- test
|--|--|-- _anotations.coco.json
|--|--|-- [images_x].jpg
|--|--|-- [images_x].jpg
|--|--|-- [..................]
|--|-- train
|--|--|-- _anotations.coco.json
|--|--|-- [images_x].jpg
|--|--|-- [images_x].jpg
|--|--|-- [..................]
|--|-- valid
|--|--|-- _anotations.coco.json
|--|--|-- [images_x].jpg
|--|--|-- [images_x].jpg
|--|--|-- [..................]
|--|-- readme.txt

This is my config in mmdetection > configs > faster_rcnn > faster_rcnn_v16_platBS16E20.py

base = [
    "../base/models/faster-rcnn_v16.py",
    "../base/schedules/schedule_1x.py",
    "../base/default_runtime.py",
]
# Dataset settings
dataset_type = "PlatDataset"
data_root = "data/plat/"

train_ann_file = "train/_annotations.coco.json"
train_data_prefix = "train/"
val_ann_file = "valid/_annotations.coco.json"
val_data_prefix = "valid/"
test_ann_file = "test/_annotations.coco.json"
test_data_prefix = "test/"

class_name = "plat"
num_classes = 1
img_scale = (640, 640)  # width, height

# Train settings
train_batch_size_per_gpu = 16
train_num_workers = 10
train_persistent_workers = True

# Valid settings
val_batch_size_per_gpu = 16
val_num_workers = 10
val_persistent_workers = True

# Train Val settings
base_lr = 0.01  # Base learning rate for optim_wrapper
max_epochs = 20  # Maximum training epochs
learning_rate = 0.01
momentum = 0.937
weight_decay = 0.0005
lr_start_factor = 1.0e-5

backend_args = None

model_test_cfg = dict(
    # The config of multi-label for multi-class prediction.
    multi_label=True,
    # The number of boxes before NMS
    nms_pre=30000,
    score_thr=0.001,  # Threshold to filter out boxes.
    nms=dict(type="nms", iou_threshold=0.95),  # NMS type and threshold
    max_per_img=300,  # Max number of detections of each image
)

train_pipeline = [
    dict(type="LoadImageFromFile", backend_args=backend_args),
    dict(type="LoadAnnotations", with_bbox=True),
    dict(type="PackDetInputs"),
]
test_pipeline = [
    dict(type="LoadImageFromFile", backend_args=backend_args),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type="PackDetInputs",
        meta_keys=(
            "img_id",
            "img_path",
            "ori_shape",
            "img_shape",
            "scale_factor",
        ),
    ),
]

train_dataloader = dict(
    batch_size=train_batch_size_per_gpu,
    num_workers=train_num_workers,
    persistent_workers=train_persistent_workers,
    sampler=dict(type="DefaultSampler", shuffle=True),
    batch_sampler=dict(type="AspectRatioBatchSampler"),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        ann_file=train_ann_file,
        data_prefix=dict(img=train_data_prefix),
        filter_cfg=dict(filter_empty_gt=True, min_size=32),
        pipeline=train_pipeline,
        backend_args=backend_args,
    ),
)
val_dataloader = dict(
    batch_size=val_batch_size_per_gpu,
    num_workers=val_num_workers,
    persistent_workers=val_persistent_workers,
    drop_last=False,
    sampler=dict(type="DefaultSampler", shuffle=False),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        ann_file=val_ann_file,
        data_prefix=dict(img=val_data_prefix),
        test_mode=True,
        pipeline=test_pipeline,
        backend_args=backend_args,
    ),
)
test_dataloader = dict(
    batch_size=1,
    num_workers=2,
    persistent_workers=True,
    drop_last=False,
    sampler=dict(type="DefaultSampler", shuffle=False),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        ann_file=test_ann_file,
        data_prefix=dict(img=test_data_prefix),
        test_mode=True,
        pipeline=test_pipeline,
    ),
)

val_evaluator = dict(
    type="CocoMetric",
    ann_file=data_root + val_ann_file,
    metric="bbox",
    format_only=False,
    backend_args=backend_args,
)
test_evaluator = dict(
    type="CocoMetric",
    metric="bbox",
    format_only=True,
    ann_file=data_root + test_ann_file,
    outfile_prefix="./work_dirs/plat_detection/test",
)

# Config of batch shapes. Only on val.
batch_shapes_cfg = dict(
    type="BatchShapePolicy",
    batch_size=val_batch_size_per_gpu,
    img_size=img_scale[0],
    size_divisor=32,
    extra_pad_ratio=0.5,
)

optimizer = dict(type="SGD", lr=base_lr, momentum=momentum, weight_decay=weight_decay)
optimizer_config = dict(_delete_=True)

# learning rate
param_scheduler = [
    dict(
        type="LinearLR",
        start_factor=lr_start_factor,
        by_epoch=True,
        begin=0,
        end=max_epochs,
        convert_to_iter_based=True,
    )
]

norm_cfg = dict(type="BN")  # Normalization config

# -----train val related-----
lr_start_factor = 1.0e-5
dsl_topk = 13  # Number of bbox selected in each level
loss_cls_weight = 1.0
loss_bbox_weight = 2.0
qfl_beta = 2.0  # beta of QualityFocalLoss
weight_decay = 0.05

# Save model checkpoint and validation intervals
save_checkpoint_intervals = 10

# The maximum checkpoints to keep.
max_keep_ckpts = 3
# single-scale training is recommended to
# be turned on, which can speed up training.
env_cfg = dict(cudnn_benchmark=True)

# hooks
default_hooks = dict(
    checkpoint=dict(
        type="CheckpointHook",
        interval=save_checkpoint_intervals,
        max_keep_ckpts=max_keep_ckpts,  # only keep latest 3 checkpoints
    )
)

custom_hooks = [
    dict(
        type="EMAHook",
        ema_type="ExpMomentumEMA",
        momentum=0.0002,
        update_buffers=True,
        strict_load=False,
        priority=49,
    )
]

train_cfg = dict(
    type="EpochBasedTrainLoop",
    max_epochs=max_epochs,
    val_interval=save_checkpoint_intervals,
)

val_cfg = dict(type="ValLoop")
test_cfg = dict(type="TestLoop")

this is the model in mmdetection > configs > _base_ > models > faster-rcnn_v16.py

model = dict(
    type="FasterRCNN",
    backbone=dict(
        type="VGG",
        depth=16,
        num_stages=4,
        dilations=(1, 1, 1, 1),
        out_indices=(3,),
        frozen_stages=-1,
        bn_eval=True,
        with_bn=True,
        pretrained="torchvision://vgg16_bn",
    ),
    rpn_head=dict(
        type="RPNHead",
        in_channels=512,  # channel changed
        feat_channels=512,  # channel changed
        anchor_generator=dict(
            type="AnchorGenerator",
            scales=[2, 4, 8, 16, 32],
            ratios=[0.5, 1.0, 2.0],
            strides=[16],
        ),
        bbox_coder=dict(
            type="DeltaXYWHBBoxCoder",
            target_means=[0.0, 0.0, 0.0, 0.0],
            target_stds=[1.0, 1.0, 1.0, 1.0],
        ),
        loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type="L1Loss", loss_weight=1.0),
    ),
    roi_head=dict(
        type="StandardRoIHead",
        bbox_roi_extractor=dict(
            type="SingleRoIExtractor",
            roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
            out_channels=512,  # channel changed
            featmap_strides=[16],
        ),
        bbox_head=dict(
            type="Shared2FCBBoxHead",
            in_channels=512,  # channel changed
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=2,
            bbox_coder=dict(
                type="DeltaXYWHBBoxCoder",
                target_means=[0.0, 0.0, 0.0, 0.0],
                target_stds=[0.1, 0.1, 0.2, 0.2],
            ),
            reg_class_agnostic=False,
            loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type="L1Loss", loss_weight=1.0),
        ),
    ),
    # model training and testing settings
    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type="MaxIoUAssigner",
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1,
            ),
            sampler=dict(
                type="RandomSampler",
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False,
            ),
            allowed_border=0,
            pos_weight=-1,
            debug=False,
        ),
        rpn_proposal=dict(
            nms_pre=12000,
            max_per_img=2000,
            nms=dict(type="nms", iou_threshold=0.7),
            min_bbox_size=0,
        ),
        rcnn=dict(
            assigner=dict(
                type="MaxIoUAssigner",
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                match_low_quality=False,
                ignore_iof_thr=-1,
            ),
            sampler=dict(
                type="RandomSampler",
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True,
            ),
            pos_weight=-1,
            debug=False,
        ),
    ),
    test_cfg=dict(
        rpn=dict(
            nms=dict(type="nms", iou_threshold=0.7),
            nms_pre=6000,
            max_per_img=1000,
            min_bbox_size=0,
        ),
        rcnn=dict(
            score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100
        ),
    ),
)

and this is my VGG model in mmdetection > mmdet > models > backbones > vgg.py

from mmdet.registry import MODELS
from mmcv.cnn import VGG


@MODELS.register_module()
class VGG(VGG):
    def __init__(self, pretrained=None, *args, **kwargs):
        self.pretrained = pretrained
        super().__init__(*args, **kwargs)

    def init_weights(self, pretrained=None):
        super().init_weights(pretrained)

    def forward(self, x):
        outs = []
        vgg_layers = getattr(self, self.module_name)
        for i in range(len(self.stage_blocks)):
            for j in range(*self.range_sub_modules[i]):
                vgg_layer = vgg_layers[j]
                x = vgg_layer(x)
            if i in self.out_indices:
                outs.append(x)
        if self.num_classes > 0:
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            outs.append(x)

        return tuple(outs)

I've also added the VGG class in __init__.py file.

I'm getting this error when i run this command on my colab

!python tools/train.py configs/faster_rcnn/faster_rcnn_v16_platBS16E20.py

this is the error:

Traceback (most recent call last):
  File "/content/ta-mmdetection/tools/train.py", line 121, in <module>
    main()
  File "/content/ta-mmdetection/tools/train.py", line 110, in main
    runner = Runner.from_cfg(cfg)
  File "/usr/local/lib/python3.10/dist-packages/mmengine/runner/runner.py", line 463, in from_cfg
    model=cfg['model'],
  File "/usr/local/lib/python3.10/dist-packages/mmengine/config/config.py", line 1502, in __getitem__
    return self._cfg_dict.__getitem__(name)
  File "/usr/local/lib/python3.10/dist-packages/mmengine/config/config.py", line 138, in __getitem__
    return self.build_lazy(super().__getitem__(key))
  File "/usr/local/lib/python3.10/dist-packages/mmengine/config/config.py", line 105, in __missing__
    raise KeyError(name)
KeyError: 'model'

is there anything wrong with my code? I'm willing to change anything as long as it fix the code.
Hope anyone can help me ASAP since I'm short in time.
Thank you for your help!

The text was updated successfully, but these errors were encountered:

mm-assistant bot assigned hhaAndroid May 7, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Need help in VGG16 Faster R-CNN implementation #11688

Need help in VGG16 Faster R-CNN implementation #11688

quitsv commented May 7, 2024

Need help in VGG16 Faster R-CNN implementation #11688

Need help in VGG16 Faster R-CNN implementation #11688

Comments

quitsv commented May 7, 2024