de/d08/triplet__sampler_8py_source.html

# encoding: utf-8

"""

@author:  liaoxingyu

@contact: liaoxingyu2@jd.com

"""


import copy

import itertools

from collections import defaultdict

from typing import Optional


import numpy as np

from torch.utils.data.sampler import Sampler


from fastreid.utils import comm


def no_index(a, b):

    assert isinstance(a, list)

    return [i for i, j in enumerate(a) if j != b]


class BalancedIdentitySampler(Sampler):


    def __init__(self, data_source: str, batch_size: int, num_instances: int, seed: Optional[int] = None):

        self.data_source = data_source

        self.batch_size = batch_size

        self.num_instances = num_instances

        self.num_pids_per_batch = batch_size // self.num_instances


        self.index_pid = defaultdict(list)

        self.pid_cam = defaultdict(list)

        self.pid_index = defaultdict(list)


        for index, info in enumerate(data_source):

            pid = info[1]

            camid = info[2]

            self.index_pid[index] = pid

            self.pid_cam[pid].append(camid)

            self.pid_index[pid].append(index)


        self.pids = sorted(list(self.pid_index.keys()))

        self.num_identities = len(self.pids)


        if seed is None:

            seed = comm.shared_random_seed()

        self._seed = int(seed)


        self._rank = comm.get_rank()

        self._world_size = comm.get_world_size()


    def __iter__(self):

        start = self._rank

        yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)


    def _infinite_indices(self):

        np.random.seed(self._seed)

        while True:

            # Shuffle identity list

            identities = np.random.permutation(self.num_identities)


            # If remaining identities cannot be enough for a batch,

            # just drop the remaining parts

            drop_indices = self.num_identities % self.num_pids_per_batch

            if drop_indices: identities = identities[:-drop_indices]


            ret = []

            for kid in identities:

                i = np.random.choice(self.pid_index[self.pids[kid]])

                _, i_pid, i_cam = self.data_source[i]

                ret.append(i)

                pid_i = self.index_pid[i]

                cams = self.pid_cam[pid_i]

                index = self.pid_index[pid_i]

                select_cams = no_index(cams, i_cam)


                if select_cams:

                    if len(select_cams) >= self.num_instances:

                        cam_indexes = np.random.choice(select_cams, size=self.num_instances - 1, replace=False)

                    else:

                        cam_indexes = np.random.choice(select_cams, size=self.num_instances - 1, replace=True)

                    for kk in cam_indexes:

                        ret.append(index[kk])

                else:

                    select_indexes = no_index(index, i)

                    if not select_indexes:

                        # Only one image for this identity

                        ind_indexes = [0] * (self.num_instances - 1)

                    elif len(select_indexes) >= self.num_instances:

                        ind_indexes = np.random.choice(select_indexes, size=self.num_instances - 1, replace=False)

                    else:

                        ind_indexes = np.random.choice(select_indexes, size=self.num_instances - 1, replace=True)


                    for kk in ind_indexes:

                        ret.append(index[kk])


                if len(ret) == self.batch_size:

                    yield from ret

                    ret = []


class NaiveIdentitySampler(Sampler):

    """

    Randomly sample N identities, then for each identity,

    randomly sample K instances, therefore batch size is N*K.

    Args:

    - data_source (list): list of (img_path, pid, camid).

    - num_instances (int): number of instances per identity in a batch.

    - batch_size (int): number of examples in a batch.

    """


    def __init__(self, data_source: str, batch_size: int, num_instances: int, seed: Optional[int] = None):

        self.data_source = data_source

        self.batch_size = batch_size

        self.num_instances = num_instances

        self.num_pids_per_batch = batch_size // self.num_instances


        self.index_pid = defaultdict(list)

        self.pid_cam = defaultdict(list)

        self.pid_index = defaultdict(list)


        for index, info in enumerate(data_source):

            pid = info[1]

            camid = info[2]

            self.index_pid[index] = pid

            self.pid_cam[pid].append(camid)

            self.pid_index[pid].append(index)


        self.pids = sorted(list(self.pid_index.keys()))

        self.num_identities = len(self.pids)


        if seed is None:

            seed = comm.shared_random_seed()

        self._seed = int(seed)


        self._rank = comm.get_rank()

        self._world_size = comm.get_world_size()


    def __iter__(self):

        start = self._rank

        yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)


    def _infinite_indices(self):

        np.random.seed(self._seed)

        while True:

            avai_pids = copy.deepcopy(self.pids)

            batch_idxs_dict = {}


            batch_indices = []

            while len(avai_pids) >= self.num_pids_per_batch:

                selected_pids = np.random.choice(avai_pids, self.num_pids_per_batch, replace=False).tolist()

                for pid in selected_pids:

                    # Register pid in batch_idxs_dict if not

                    if pid not in batch_idxs_dict:

                        idxs = copy.deepcopy(self.pid_index[pid])

                        if len(idxs) < self.num_instances:

                            idxs = np.random.choice(idxs, size=self.num_instances, replace=True).tolist()

                        np.random.shuffle(idxs)

                        batch_idxs_dict[pid] = idxs


                    avai_idxs = batch_idxs_dict[pid]

                    for _ in range(self.num_instances):

                        batch_indices.append(avai_idxs.pop(0))


                    if len(avai_idxs) < self.num_instances: avai_pids.remove(pid)


                assert len(batch_indices) == self.batch_size, f"batch indices have wrong " \

                                                              f"length with {len(batch_indices)}!"

                yield from batch_indices

                batch_indices = []


fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler
Definition triplet_sampler.py:23

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.pid_cam
pid_cam
Definition triplet_sampler.py:31

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler._rank
_rank
Definition triplet_sampler.py:48

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.batch_size
batch_size
Definition triplet_sampler.py:26

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.num_identities
num_identities
Definition triplet_sampler.py:42

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler._infinite_indices
_infinite_indices(self)
Definition triplet_sampler.py:55

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler._seed
_seed
Definition triplet_sampler.py:46

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.__init__
__init__(self, str data_source, int batch_size, int num_instances, Optional[int] seed=None)
Definition triplet_sampler.py:24

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.pid_index
pid_index
Definition triplet_sampler.py:32

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.pids
pids
Definition triplet_sampler.py:41

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.__iter__
__iter__(self)
Definition triplet_sampler.py:51

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.index_pid
index_pid
Definition triplet_sampler.py:30

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.data_source
data_source
Definition triplet_sampler.py:25

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler._world_size
_world_size
Definition triplet_sampler.py:49

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.num_instances
num_instances
Definition triplet_sampler.py:27

fastreid.data.samplers.triplet_sampler.BalancedIdentitySampler.num_pids_per_batch
num_pids_per_batch
Definition triplet_sampler.py:28

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler
Definition triplet_sampler.py:101

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.index_pid
index_pid
Definition triplet_sampler.py:117

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.num_instances
num_instances
Definition triplet_sampler.py:114

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.num_pids_per_batch
num_pids_per_batch
Definition triplet_sampler.py:115

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.pid_cam
pid_cam
Definition triplet_sampler.py:118

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.num_identities
num_identities
Definition triplet_sampler.py:129

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.batch_size
batch_size
Definition triplet_sampler.py:113

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler._seed
_seed
Definition triplet_sampler.py:133

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.__init__
__init__(self, str data_source, int batch_size, int num_instances, Optional[int] seed=None)
Definition triplet_sampler.py:111

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.data_source
data_source
Definition triplet_sampler.py:112

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.pids
pids
Definition triplet_sampler.py:128

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler._world_size
_world_size
Definition triplet_sampler.py:136

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.__iter__
__iter__(self)
Definition triplet_sampler.py:138

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler.pid_index
pid_index
Definition triplet_sampler.py:119

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler._rank
_rank
Definition triplet_sampler.py:135

fastreid.data.samplers.triplet_sampler.NaiveIdentitySampler._infinite_indices
_infinite_indices(self)
Definition triplet_sampler.py:142

fastreid.data.samplers.triplet_sampler.no_index
no_index(a, b)
Definition triplet_sampler.py:18

fastreid.utils
Definition __init__.py:1