d7/d5b/posec3d__cat__mlp__fl__3rd_8py_source.html

model = dict(

    type='ActionRecognitionRunner',

    backbone=dict(

        action_feat = dict(

            type='ResNet3d',

            in_channels = 17,

            base_channels = 64,

            stage_blocks = (4, 6, 3),

            out_indices = (2, ),

            spatial_strides = (2, 2, 2),

            temporal_strides = (1, 1, 2),

            dilations = (1, 1, 1),

            conv1_kernel = (1, 7, 7),

            conv1_stride_s = 1,

            conv1_stride_t = 1,

            pool1_stride_s = 1,

            pool1_stride_t = 1,

            inflate = (0, 1, 1),

            inflate_style = '3x1x1',

            input_key = 'pose_heatmap_for_action'

        ),

        pose_feat = dict(

            type='ResNet3d',

            in_channels = 17,

            base_channels = 32,

            stage_blocks = (4, 6, 3),

            out_indices = (2, ),

            spatial_strides = (2, 2, 2),

            temporal_strides = (1, 1, 2),

            dilations = (1, 1, 1),

            conv1_kernel = (1, 7, 7),

            conv1_stride_s = 1,

            conv1_stride_t = 1,

            pool1_stride_s = 1,

            pool1_stride_t = 1,

            inflate = (0, 1, 1),

            inflate_style = '3x1x1',

            input_key = 'pose_heatmap_for_pose'

        ),

    ),

    fusion=dict(

        type='I3DFusion',

        in_channels = 1024+512,

        out_channels = 0,

        dropout_ratio=0.5,

        input_key = ['action_feat', 'pose_feat']

    ),

    head=dict(

        action_upper = dict(

            type='MLPHead',

            in_channels=1024+512,

            num_classes=6,

            layer_channels=[2048],

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        action_lower = dict(

            type='MLPHead',

            in_channels=1024+512,

            num_classes=12,

            layer_channels=[2048],

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        pose = dict(

            type='MLPHead',

            in_channels=1024+512,

            num_classes=8,

            layer_channels=[2048],

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        hand = dict(

            type='MLPHead',

            in_channels=1024+512,

            num_classes=4,

            layer_channels=[2048],

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        foot = dict(

            type='MLPHead',

            in_channels=1024+512,

            num_classes=2,

            layer_channels=[2048],

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        aux_action_upper = dict(

            type='MLPHead',

            in_channels=1024,

            num_classes=6,

            layer_channels=[1024],

            dropout_ratio=0.5,

            input_key = 'action_feat',

            input_type = '3d',

        ),

        aux_action_lower = dict(

            type='MLPHead',

            in_channels=1024,

            num_classes=12,

            layer_channels=[1024],

            dropout_ratio=0.5,

            input_key = 'action_feat',

            input_type = '3d',

        ),

        aux_pose = dict(

            type='MLPHead',

            in_channels=512,

            num_classes=8,

            layer_channels=[1024],

            dropout_ratio=0.5,

            input_key = 'pose_feat',

            input_type = '3d',

        ),

        aux_hand = dict(

            type='MLPHead',

            in_channels=512,

            num_classes=4,

            layer_channels=[1024],

            dropout_ratio=0.5,

            input_key = 'pose_feat',

            input_type = '3d',

        ),

        aux_foot = dict(

            type='MLPHead',

            in_channels=512,

            num_classes=2,

            layer_channels=[1024],

            dropout_ratio=0.5,

            input_key = 'pose_feat',

            input_type = '3d',

        ),

    ),

    predict_keys = dict(

        #예측한 라벨의 키 = 헤드의 키(스코어의 키)

        pred_action_upper = 'action_upper',

        pred_action_lower = 'action_lower',

        pred_pose = 'pose',

        pred_hand = 'hand',

        pred_foot = 'foot',

    )

)


score_keys = ['action_upper', 'action_lower', 'pose', 'hand', 'foot', 'aux_action_upper', 'aux_action_lower', 'aux_pose', 'aux_hand', 'aux_foot']

pred_keys = ['pred_action_upper', 'pred_action_lower', 'pred_pose', 'pred_hand', 'pred_foot']

gt_keys = ['gt_action_upper', 'gt_action_lower', 'gt_pose', 'gt_hand', 'gt_foot', 'gt_action_upper', 'gt_action_lower', 'gt_pose', 'gt_hand', 'gt_foot']

target_tasks = ['action_upper', 'action_lower', 'pose', 'hand', 'foot', 'action_upper', 'action_lower', 'pose', 'hand', 'foot']

train_tasks = ['action_upper', 'action_lower', 'pose', 'hand', 'foot']


loss = dict(

    MutiTaskSigmoidFocalLoss = dict(

        weight = 1.0,

        task_key = 'category',

        pred_keys = score_keys,

        gt_keys = gt_keys,

        target_tasks = target_tasks,

        weights = [1.0, 1.5, 1.0, 1.0, 1.0, 0.25, 0.5, 0.25, 0.25, 0.25],

        # data_num = dict(action = [100, 100, 100, 24, 100, 34, 3, 100, 100], pose = [98, 517, 11, 35])

    )

)


metric_args = dict(pred_key=pred_keys,

                   gt_key=gt_keys[:5],

                   target_tasks=target_tasks[:5],

                   task_key='category')


collect_keys = ['pose_heatmap_for_action', 'pose_heatmap_for_pose', 'gt_action_upper', 'gt_action_lower', 'gt_pose', 'gt_hand', 'gt_foot']


data_loader = dict(

    type = 'ActionDatasetLoader_mtml',

    data_folder = '/media/safemotion/HDD5/pjm_test/action_train_dataset_2023/action_mtml_1st_split',

    category_info = dict(action_upper = 6,

                         action_lower = 12,

                         pose = 8,

                         hand = 4,

                         foot = 2),

    clip_len_action = 20,

    clip_len_pose = 6,

)

ep_mul = 20

train = dict(

    num_workers = 8,

    init_lr = 0.1,

    batch_size = 32,

    epochs = 100*ep_mul,

    optimizer = 'SGD',

    optimizer_args = dict(momentum=0.9, nesterov=True, weight_decay=0.0001),

    scheduler = 'CosineAnnealingLR',

    # scheduler = 'StepLR',

    scheduler_args = dict(T_max=30*ep_mul, eta_min=0),

    adjust_lr_epoch = [10*ep_mul, 50*ep_mul, 100*ep_mul, 130*ep_mul],

    adjust_lr_rate = [0.5, 0.1, 0.1, 0.1],

    val_interval = 1,


    update_loss_weight = False,

    update_loss_weight_interval = 20,

    base_weight = 0.5,


    pretrained = None,#'/media/safemotion/HDD5/pjm_test/action_train_test/9.pth',

    save_root = '/media/safemotion/HDD5/pjm_test/action_train_result/action_cat_mlp_fl_3rd',


)


test = dict(

    model_path = None,

    save_root = '',

)