de/d05/posec3d__kp__img_8py_source.html

model = dict(

    type='ActionRecognitionRunner',

    backbone=dict(

        action_feat = dict(

            type='ResNet3d',

            in_channels = 17,

            base_channels = 64,

            stage_blocks = (4, 6, 3),

            out_indices = (2, ),

            spatial_strides = (2, 2, 2),

            temporal_strides = (1, 1, 2),

            dilations = (1, 1, 1),

            conv1_kernel = (1, 7, 7),

            conv1_stride_s = 1,

            conv1_stride_t = 1,

            pool1_stride_s = 1,

            pool1_stride_t = 1,

            inflate = (0, 1, 1),

            inflate_style = '3x1x1',

            input_key = 'pose_heatmap_for_action'

        ),

        pose_feat = dict(

            type='ResNet3d',

            in_channels = 17,

            base_channels = 32,

            stage_blocks = (4, 6, 3),

            out_indices = (2, ),

            spatial_strides = (2, 2, 2),

            temporal_strides = (1, 1, 2),

            dilations = (1, 1, 1),

            conv1_kernel = (1, 7, 7),

            conv1_stride_s = 1,

            conv1_stride_t = 1,

            pool1_stride_s = 1,

            pool1_stride_t = 1,

            inflate = (0, 1, 1),

            inflate_style = '3x1x1',

            input_key = 'pose_heatmap_for_pose'

        ),

        img_feat = dict(

            type = 'ResNet2d',

            in_channels = 768,

            stage_blocks = [2, 2, 2, 2],

            input_key = 'image_feature',

        ),

    ),

    fusion=dict(

        type='CatLayer',

        input_key = ['action_feat', 'pose_feat', 'img_feat'],

        input_type = ['3d', '3d', '2d'],

    ),

    head=dict(

        action_upper = dict(

            type='LinearHead',

            in_channels=1024+512+768,

            num_classes=6,

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        action_lower = dict(

            type='LinearHead',

            in_channels=1024+512+768,

            num_classes=12,

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        pose = dict(

            type='MLPHead',

            in_channels=1024+512+768,

            num_classes=8,

            layer_channels=[2048],

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        hand = dict(

            type='MLPHead',

            in_channels=1024+512+768,

            num_classes=4,

            layer_channels=[2048],

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        foot = dict(

            type='MLPHead',

            in_channels=1024+512+768,

            num_classes=2,

            layer_channels=[2048],

            dropout_ratio=0.5,

            input_key = 'fusion',

        ),

        aux_action_upper = dict(

            type='I3DHead',

            in_channels=1024,

            num_classes=6,

            dropout_ratio=0.5,

            input_key = 'action_feat',

        ),

        aux_action_lower = dict(

            type='I3DHead',

            in_channels=1024,

            num_classes=12,

            dropout_ratio=0.5,

            input_key = 'action_feat',

        ),

        aux_pose = dict(

            type='MLPHead',

            in_channels=512,

            num_classes=8,

            layer_channels=[1024],

            dropout_ratio=0.5,

            input_key = 'pose_feat',

            input_type = '3d',

        ),

        aux_hand = dict(

            type='MLPHead',

            in_channels=512,

            num_classes=4,

            layer_channels=[1024],

            dropout_ratio=0.5,

            input_key = 'pose_feat',

            input_type = '3d',

        ),

        aux_foot = dict(

            type='MLPHead',

            in_channels=512,

            num_classes=2,

            layer_channels=[1024],

            dropout_ratio=0.5,

            input_key = 'pose_feat',

            input_type = '3d',

        ),

    ),

    predict_keys = dict(

        #예측한 라벨의 키 = 헤드의 키(스코어의 키)

        pred_action_upper = 'action_upper',

        pred_action_lower = 'action_lower',

        pred_pose = 'pose',

        pred_hand = 'hand',

        pred_foot = 'foot',

    )

)


score_keys = ['action_upper', 'action_lower', 'pose', 'hand', 'foot', 'aux_action_upper', 'aux_action_lower', 'aux_pose', 'aux_hand', 'aux_foot']

pred_keys = ['pred_action_upper', 'pred_action_lower', 'pred_pose', 'pred_hand', 'pred_foot']

gt_keys = ['gt_action_upper', 'gt_action_lower', 'gt_pose', 'gt_hand', 'gt_foot', 'gt_action_upper', 'gt_action_lower', 'gt_pose', 'gt_hand', 'gt_foot']

target_tasks = ['action_upper', 'action_lower', 'pose', 'hand', 'foot', 'action_upper', 'action_lower', 'pose', 'hand', 'foot']

train_tasks = ['action_upper', 'action_lower', 'pose', 'hand', 'foot']


loss = dict(

    MutiTaskSigmoidFocalLoss = dict(

        weight = 1.0,

        task_key = 'category',

        pred_keys = score_keys,

        gt_keys = gt_keys,

        target_tasks = target_tasks,

        weights = [1.0, 1.0, 1.0, 1.0, 1.0, 0.25, 0.25, 0.25, 0.25, 0.25],

        # data_num = dict(action = [100, 100, 100, 24, 100, 34, 3, 100, 100], pose = [98, 517, 11, 35])

    )

)


metric_args = dict(pred_key=pred_keys,

                   gt_key=gt_keys[:5],

                   target_tasks=target_tasks[:5],

                   task_key='category')


collect_keys = ['pose_heatmap_for_action', 'pose_heatmap_for_pose', 'image_feature', 'gt_action_upper', 'gt_action_lower', 'gt_pose', 'gt_hand', 'gt_foot']


data_loader = dict(

    type = 'ActionDatasetLoader_mtml',

    data_folder = '/media/safemotion/HDD5/pjm_test/action_train_dataset_2023/action_kp_imf_split',

    category_info = dict(action_upper = 6,

                         action_lower = 12,

                         pose = 8,

                         hand = 4,

                         foot = 2),

    clip_len_action = 20,

    clip_len_pose = 6,

)

ep_mul = 20

train = dict(

    num_workers = 8,

    init_lr = 0.1,

    batch_size = 32,

    epochs = 100*ep_mul,

    optimizer = 'SGD',

    optimizer_args = dict(momentum=0.9, nesterov=True, weight_decay=0.0001),

    scheduler = 'CosineAnnealingLR',

    # scheduler = 'StepLR',

    scheduler_args = dict(T_max=30*ep_mul, eta_min=0),

    adjust_lr_epoch = [10*ep_mul, 50*ep_mul, 100*ep_mul, 130*ep_mul],

    adjust_lr_rate = [0.5, 0.1, 0.1, 0.1],

    val_interval = 1,


    update_loss_weight = False,

    update_loss_weight_interval = 20,

    base_weight = 0.5,


    pretrained = None,#'/media/safemotion/HDD5/pjm_test/action_train_test/9.pth',

    save_root = '/media/safemotion/HDD5/pjm_test/action_train_result/action_kp_img',


)


test = dict(

    model_path = None,

    save_root = '',

)