Quantcast
Viewing all articles
Browse latest Browse all 14069

Bounding box regression loss increases where RPN loss decreases Oriented RCNN

I am playing around with an oriented RCNN with a custom satellite DOTA dataset in MMRotate. When I train the Oriented RCNN, the bounding box regression loss starts very low (loss ~ 0.009) and almost consistently increases when the RPN regression loss decreases! Some facts about the dataset:

  • around 60.000 images.
  • each images can contain between 0 - 4 objects.
  • the images are large (1024x1024) and the objects tend to be very small.

I have looked at my data and angle definitions and am convinced that these are set correctly.

Does anyone have experienced anything similar?

Here is the config for mmrotate oriented rcnn:

_base_ = ['../_base_/datasets/ssdd.py', '../_base_/schedules/schedule_3x.py','../_base_/default_runtime.py']angle_version = 'le90'model = dict(    type='OrientedRCNN',    backbone=dict(        type='ResNet',        depth=50,        num_stages=4,        out_indices=(0, 1, 2, 3),        frozen_stages=-1,        norm_cfg=dict(type='BN', requires_grad=True),        norm_eval=True,        style='pytorch',        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),    neck=dict(        type='FPN',        in_channels=[256, 512, 1024, 2048],        out_channels=256,        num_outs=5),    rpn_head=dict(        type='OrientedRPNHead',        in_channels=256,        feat_channels=256,        version=angle_version,        anchor_generator=dict(            type='AnchorGenerator',            scales=[8],            ratios=[0.5, 1.0, 2.0],            strides=[4, 8, 16, 32, 64]),        bbox_coder=dict(            type='MidpointOffsetCoder',            angle_range=angle_version,            target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],            target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),        loss_cls=dict(            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),        loss_bbox=dict(            type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),    roi_head=dict(        type='OrientedStandardRoIHead',        bbox_roi_extractor=dict(            type='RotatedSingleRoIExtractor',            roi_layer=dict(                type='RoIAlignRotated',                out_size=7,                sample_num=2,                clockwise=True),            out_channels=256,            featmap_strides=[4, 8, 16, 32]),        bbox_head=dict(            type='RotatedShared2FCBBoxHead',            in_channels=256,            fc_out_channels=1024,            roi_feat_size=7,            num_classes=1,            bbox_coder=dict(                type='DeltaXYWHAOBBoxCoder',                angle_range=angle_version,                norm_factor=None,                edge_swap=True,                proj_xy=True,                target_means=(.0, .0, .0, .0, .0),                target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),            reg_class_agnostic=True,            loss_cls=dict(                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),    train_cfg=dict(        rpn=dict(            assigner=dict(                type='MaxIoUAssigner',                pos_iou_thr=0.7,                neg_iou_thr=0.3,                min_pos_iou=0.3,                match_low_quality=True,                ignore_iof_thr=-1),            sampler=dict(                type='RandomSampler',                num=256,                pos_fraction=0.5,                neg_pos_ub=-1,                add_gt_as_proposals=False),            allowed_border=0,            pos_weight=-1,            debug=False),        rpn_proposal=dict(            nms_pre=2000,            max_per_img=2000,            nms=dict(type='nms', iou_threshold=0.8),            min_bbox_size=0),        rcnn=dict(            assigner=dict(                type='MaxIoUAssigner',                pos_iou_thr=0.5,                neg_iou_thr=0.5,                min_pos_iou=0.5,                match_low_quality=False,                iou_calculator=dict(type='RBboxOverlaps2D'),                ignore_iof_thr=-1),            sampler=dict(                type='RRandomSampler',                num=512,                pos_fraction=0.25,                neg_pos_ub=-1,                add_gt_as_proposals=True),            pos_weight=-1,            debug=False)),    test_cfg=dict(        rpn=dict(            nms_pre=2000,            max_per_img=2000,            nms=dict(type='nms', iou_threshold=0.8),            min_bbox_size=0),        rcnn=dict(            nms_pre=2000,            min_bbox_size=0,            score_thr=0.05,            nms=dict(iou_thr=0.1),            max_per_img=2000)))img_norm_cfg = dict(    mean=[21.55, 21.55, 21.55], std=[24.42, 24.42, 24.42], to_rgb=True)train_pipeline = [    dict(type='LoadImageFromFile'),    dict(type='LoadAnnotations', with_bbox=True),    dict(type='RResize', img_scale=(608, 608)),    dict(        type='RRandomFlip',        flip_ratio=[0.25, 0.25, 0.25],        direction=['horizontal', 'vertical', 'diagonal'],        version=angle_version),    dict(type='Normalize', **img_norm_cfg),    dict(type='Pad', size_divisor=32),    dict(type='DefaultFormatBundle'),    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])]data = dict(    train=dict(pipeline=train_pipeline, version=angle_version),    val=dict(version=angle_version),    test=dict(version=angle_version))optimizer = dict(lr=0.005)# evaluationevaluation = dict(interval=72, metric='mAP')# learning policylr_config = dict(    policy='step',    warmup='linear',    warmup_iters=500,    warmup_ratio=1.0 / 3,    step=[65, 71])runner = dict(type='EpochBasedRunner', max_epochs=72)checkpoint_config = dict(interval=12)

Viewing all articles
Browse latest Browse all 14069

Trending Articles