d0/d40/action__recognition__runner_8py_source.html

import torch

import torch.nn as nn

import numpy as np

from smaction.builder.model_builder import build_action_model


from smaction.datasets.transform.pose_transform import pose_sampling, pose_compact, pose_resize, make_pose_heatmap

from smaction.utils.transforms import *

import smaction.utils.action_utils as utils

import copy


#skeleton, image, fusion 버전으로 분리 필요


class ActionRecognitionRunner(nn.Module):

    """

    스켈레톤 및 이미지 피처를 입력으로 행동인식을 수행하는 클래스

    TODO: 영상 특징을 사용할 수 있도록 내용 수정 필요, inference 구조를 변경해서 융합모듈을 따로 관리하는 부분 제거(백본쪽에 통합)

    args:

        backbone (dict) : 백본 모델을 구성하는 파라미터

        head (dict) : 헤드 모델을 구성하는 파라미터

        fusion (dict) : 융합 모델을 구성하는 파라미터

        predict_keys (list[str]) : 스코어(head의 결과)의 키값, 키값에 대응하는 스코어에서 최대값을 가지는 클래스를 출력함

    """


    def __init__(self, backbone, head, fusion=None, predict_keys=None):

        super().__init__()


        self.backbone = nn.ModuleDict()

        for key, cfg in backbone.items():

            self.backbone[key] = build_action_model(cfg)


        self.head = nn.ModuleDict()

        for key, cfg in head.items():

            self.head[key] = build_action_model(cfg)


        self.fusion = None

        if fusion is not None:

            self.fusion = build_action_model(fusion)


        self.predict_keys = {} if predict_keys is None else predict_keys


    def get_module_names(self):

        """

        return (list[str])

            모듈 구성의 종류를 반환함

            모듈의 구성은 backbone, head, fusion이 있음

        """

        modules = ['backbone', 'head']

        if self.fusion is not None:

            modules.append('fusion')

        return modules


    def make_model_input(self, data_list, clip_len=20, device='cuda:0'):

        """

        args:

            data_list (list[dict]) : 행동인식 모델의 입력으로 사용할 원천 데이터(dict type)를 담고있는 리스트

                데이터 구성

                    keypoints (np.array) : 스켈레톤 좌표(x, y) 및 스코어, shape (17, 3)

                    TODO -> image_features (np.array or Tensor): 영상 특징

            clip_len (int) : data_list에서 샘플링하는 수량

            device (str) : inference에 사용할 디바이스

        return (dict) :

            모델의 입력에 사용될 데이터

                pose_heatmap or pose_heatmap_for_action, pose_heatmap_for_pose (Tensor): 스켈레톤 좌표를 사용해서 생성한 이미지

                    shape (1, 17, clip_len, 64, 64) or (1, 17, clip_len_pose=6, 64, 64)

                TODO -> image_features (Tensor) : 영상특징, shape (1, C, clip_len, 1)

        """

        #TODO : 백본을 여러개 사용하는 경우 입력 데이터 처리하는 부분 필요

        #TODO : 영상 특징이 존재할 경우 처리 프로세스 코드 작성 필요, 하나로 붙이고 device로 넘기는 기능 필요

        #TODO : 일반화 필요, 입력데이터를 생성하는 일반화 코드가 필요함, mmlab 참조해서 입력 전처리 파이프라인 코드 작성

        sample = dict()


        kps = []

        for anno in data_list:

            kps.append(anno['keypoints'])


        kps = np.array([kps])

        sample['keypoint'] = kps[..., :2]

        sample['keypoint_score'] = kps[..., 2]


        sample = pose_sampling(sample, clip_len=clip_len) #샘플링

        sample = pose_compact(sample) #좌표의 범위 체크

        sample = pose_resize(sample, scale=(64, 64)) #정규화


        if len(self.backbone.keys()) == 1: #백본 하나만 사용할 경우

            #키포인트 이미지 생성

            sample = make_pose_heatmap(sample) #스켈레톤을 기반으로 이미지 생성

            #사용하는 디바이스로 옮기기, 배치축 생성

            sample['pose_heatmap'] = torch.from_numpy(sample['pose_heatmap']).to(device).unsqueeze(0)

        else:

            #포즈 예측 백본에 들어갈 입력을 추가로 생성


            clip_len_pose = 6 #포즈인식에 사용할 프레임 수


            #복사

            sample_pose = copy.deepcopy(sample)


            #포즈인식에 사용할 스켈레톤을 샘플링함, 중앙위치에서 선택함

            frames = sample_pose['keypoint'].shape[1] #전체 프레임수

            start = (frames - clip_len_pose)//2 #포즈인식에 사용할 시작 프레임 위치

            end = start+clip_len_pose #포즈인식에 사용할 끝 프레임 위치

            sample_pose['keypoint'] = sample_pose['keypoint'][:,start:end, :, : ] #키포인트 좌표 샘플링

            sample_pose['keypoint_score'] = sample_pose['keypoint_score'][:,start:end, : ] #키포인트 스코어 샘플링

            sample_pose = pose_compact(sample_pose) #좌표 범위 체크

            sample_pose = pose_resize(sample_pose, scale=(64, 64)) #정규화


            #키포인트 이미지 생성

            sample = make_pose_heatmap(sample)

            sample_pose = make_pose_heatmap(sample_pose)

            sample['pose_heatmap_for_action'] = torch.from_numpy(sample['pose_heatmap']).to(device).unsqueeze(0)

            sample['pose_heatmap_for_pose'] = torch.from_numpy(sample_pose['pose_heatmap']).to(device).unsqueeze(0)


        return sample


    def run_recognizer(self, data_list, k=60, clip_len=20, use_valid=True, device='cuda:0'):

        """

        args:

            data_list (list[dict]) : 행동인식 모델의 입력으로 사용할 원천 데이터(dict type)를 담고있는 리스트

                데이터 구성

                    keypoints (np.array) : 스켈레톤 좌표(x, y) 및 스코어, shape (17, 3)

                    frame_id (int) : 프레임 번호

                    TODO -> image_features (np.array or Tensor): 영상 특징

            k (int) : 행동인식하는데 사용하는 프레임 구간

            clip_len (int) : data_list에서 샘플링하는 수량

            use_valid (bool) : 유효성 검사 사용 유무

            device (str) : inference에 사용할 디바이스

        return (dict):

            스코어, 예측클래스를 출력


        """

        # TODO : 수정 필요 -> dual backbone 사용할 수 있도록 코드 수정 필요함


        #유효성 체크

        if use_valid:

            if not utils.check_valid(data_list, k):

                return None


        #검출에 실패한 프레임에 더미 포즈를 삽입함, 더미포즈는 이전 데이터를 복사해서 사용

        data_list = utils.insert_dummy_pose(data_list)


        #모델의 입력데이터 생성

        sample_dict = self.make_model_input(data_list, clip_len, device=device)


        #모델 inference

        with torch.no_grad():

            result = self.inference(sample_dict)


        return result


    def inference(self, sample_dict):

        """

        args:

            sample_dict (dict) : 행동인식 모델 백본의 입력으로 사용할 데이터

        return (dict):

            스코어 및 예측클래스

        """

        feats = {}

        for key, net in self.backbone.items():

            feats[key] = net(sample_dict)


        if self.fusion is not None:

            feats['fusion'] = self.fusion(feats)


        scores = {}

        for key, net in self.head.items():

            scores[key] = net(feats)


        preds = {}

        for key, score_key in self.predict_keys.items():

            preds[key] = scores[score_key].argmax(dim=1)


        preds.update(scores)


        return preds


    def forward(self, sample_dict):

        return self.inference(sample_dict)


action_recognition_runner.ActionRecognitionRunner
Definition action_recognition_runner.py:12

action_recognition_runner.ActionRecognitionRunner.predict_keys
predict_keys
Definition action_recognition_runner.py:37

action_recognition_runner.ActionRecognitionRunner.inference
inference(self, sample_dict)
Definition action_recognition_runner.py:148

action_recognition_runner.ActionRecognitionRunner.forward
forward(self, sample_dict)
Definition action_recognition_runner.py:174

action_recognition_runner.ActionRecognitionRunner.head
head
Definition action_recognition_runner.py:29

action_recognition_runner.ActionRecognitionRunner.make_model_input
make_model_input(self, data_list, clip_len=20, device='cuda:0')
Definition action_recognition_runner.py:51

action_recognition_runner.ActionRecognitionRunner.backbone
backbone
Definition action_recognition_runner.py:25

action_recognition_runner.ActionRecognitionRunner.get_module_names
get_module_names(self)
Definition action_recognition_runner.py:39

action_recognition_runner.ActionRecognitionRunner.run_recognizer
run_recognizer(self, data_list, k=60, clip_len=20, use_valid=True, device='cuda:0')
Definition action_recognition_runner.py:113

action_recognition_runner.ActionRecognitionRunner.__init__
__init__(self, backbone, head, fusion=None, predict_keys=None)
Definition action_recognition_runner.py:22

action_recognition_runner.ActionRecognitionRunner.fusion
fusion
Definition action_recognition_runner.py:33

pose_transform

transforms

torch.nn