Safemotion Lib
Loading...
Searching...
No Matches
cuhk03.py
Go to the documentation of this file.
1# encoding: utf-8
2"""
3@author: liaoxingyu
4@contact: liaoxingyu2@jd.com
5"""
6
7import json
8import os.path as osp
9
10from fastreid.data.datasets import DATASET_REGISTRY
11from fastreid.utils.file_io import PathManager
12from .bases import ImageDataset
13
14
15@DATASET_REGISTRY.register()
17 """CUHK03.
18
19 Reference:
20 Li et al. DeepReID: Deep Filter Pairing Neural Network for Person Re-identification. CVPR 2014.
21
22 URL: `<http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html#!>`_
23
24 Dataset statistics:
25 - identities: 1360.
26 - images: 13164.
27 - cameras: 6.
28 - splits: 20 (classic).
29 """
30 dataset_dir = '/workspace/LUPerson/fast-reid/datasets/'
31 dataset_url = None
32 dataset_name = "cuhk03"
33
34 def __init__(self, root='datasets', split_id=0, cuhk03_labeled=True, cuhk03_classic_split=False, **kwargs):
35 self.root = root
37
38 self.data_dir = osp.join(self.dataset_dirdataset_dir, 'cuhk03-np')
39 self.raw_mat_path = osp.join(self.data_dir, 'cuhk-03.mat')
40
41 self.imgs_detected_dir = osp.join(self.dataset_dirdataset_dir, 'images_detected')
42 self.imgs_labeled_dir = osp.join(self.dataset_dirdataset_dir, 'images_labeled')
43
44 self.split_classic_det_json_path = osp.join(self.dataset_dirdataset_dir, 'splits_classic_detected.json')
45 self.split_classic_lab_json_path = osp.join(self.dataset_dirdataset_dir, 'splits_classic_labeled.json')
46
47 self.split_new_det_json_path = osp.join(self.dataset_dirdataset_dir, 'splits_new_detected.json')
48 self.split_new_lab_json_path = osp.join(self.dataset_dirdataset_dir, 'splits_new_labeled.json')
49
50 self.split_new_det_mat_path = osp.join(self.dataset_dirdataset_dir, 'cuhk03_new_protocol_config_detected.mat')
51 self.split_new_lab_mat_path = osp.join(self.dataset_dirdataset_dir, 'cuhk03_new_protocol_config_labeled.mat')
52
53 required_files = [
55 self.data_dir,
56 self.raw_mat_path,
59 ]
60 self.check_before_run(required_files)
61
62 self.preprocess_split()
63
64 if cuhk03_labeled:
65 split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path
66 else:
67 split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path
68
69 with PathManager.open(split_path) as f:
70 splits = json.load(f)
71 assert split_id < len(splits), 'Condition split_id ({}) < len(splits) ({}) is false'.format(split_id,
72 len(splits))
73 split = splits[split_id]
74
75 train = split['train']
76 tmp_train = []
77 for img_path, pid, camid in train:
78 new_pid = self.dataset_name + "_" + str(pid)
79 new_camid = self.dataset_name + "_" + str(camid)
80 tmp_train.append((img_path, new_pid, new_camid))
81 train = tmp_train
82 del tmp_train
83 query = split['query']
84 gallery = split['gallery']
85
86 super(CUHK03, self).__init__(train, query, gallery, **kwargs)
87
89 # This function is a bit complex and ugly, what it does is
90 # 1. extract data from cuhk-03.mat and save as png images
91 # 2. create 20 classic splits (Li et al. CVPR'14)
92 # 3. create new split (Zhong et al. CVPR'17)
93 if osp.exists(self.imgs_labeled_dir) \
94 and osp.exists(self.imgs_detected_dir) \
95 and osp.exists(self.split_classic_det_json_path) \
96 and osp.exists(self.split_classic_lab_json_path) \
97 and osp.exists(self.split_new_det_json_path) \
98 and osp.exists(self.split_new_lab_json_path):
99 return
100
101 import h5py
102 from imageio import imwrite
103 from scipy.io import loadmat
104
105 PathManager.mkdirs(self.imgs_detected_dir)
106 PathManager.mkdirs(self.imgs_labeled_dir)
107
108 print('Extract image data from "{}" and save as png'.format(self.raw_mat_path))
109 mat = h5py.File(self.raw_mat_path, 'r')
110
111 def _deref(ref):
112 return mat[ref][:].T
113
114 def _process_images(img_refs, campid, pid, save_dir):
115 img_paths = [] # Note: some persons only have images for one view
116 for imgid, img_ref in enumerate(img_refs):
117 img = _deref(img_ref)
118 if img.size == 0 or img.ndim < 3:
119 continue # skip empty cell
120 # images are saved with the following format, index-1 (ensure uniqueness)
121 # campid: index of camera pair (1-5)
122 # pid: index of person in 'campid'-th camera pair
123 # viewid: index of view, {1, 2}
124 # imgid: index of image, (1-10)
125 viewid = 1 if imgid < 5 else 2
126 img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format(campid + 1, pid + 1, viewid, imgid + 1)
127 img_path = osp.join(save_dir, img_name)
128 if not osp.isfile(img_path):
129 imwrite(img_path, img)
130 img_paths.append(img_path)
131 return img_paths
132
133 def _extract_img(image_type):
134 print('Processing {} images ...'.format(image_type))
135 meta_data = []
136 imgs_dir = self.imgs_detected_dir if image_type == 'detected' else self.imgs_labeled_dir
137 for campid, camp_ref in enumerate(mat[image_type][0]):
138 camp = _deref(camp_ref)
139 num_pids = camp.shape[0]
140 for pid in range(num_pids):
141 img_paths = _process_images(camp[pid, :], campid, pid, imgs_dir)
142 assert len(img_paths) > 0, 'campid{}-pid{} has no images'.format(campid, pid)
143 meta_data.append((campid + 1, pid + 1, img_paths))
144 print('- done camera pair {} with {} identities'.format(campid + 1, num_pids))
145 return meta_data
146
147 meta_detected = _extract_img('detected')
148 meta_labeled = _extract_img('labeled')
149
150 def _extract_classic_split(meta_data, test_split):
151 train, test = [], []
152 num_train_pids, num_test_pids = 0, 0
153 num_train_imgs, num_test_imgs = 0, 0
154 for i, (campid, pid, img_paths) in enumerate(meta_data):
155
156 if [campid, pid] in test_split:
157 for img_path in img_paths:
158 camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based
159 test.append((img_path, num_test_pids, camid))
160 num_test_pids += 1
161 num_test_imgs += len(img_paths)
162 else:
163 for img_path in img_paths:
164 camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based
165 train.append((img_path, num_train_pids, camid))
166 num_train_pids += 1
167 num_train_imgs += len(img_paths)
168 return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs
169
170 print('Creating classic splits (# = 20) ...')
171 splits_classic_det, splits_classic_lab = [], []
172 for split_ref in mat['testsets'][0]:
173 test_split = _deref(split_ref).tolist()
174
175 # create split for detected images
176 train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \
177 _extract_classic_split(meta_detected, test_split)
178 splits_classic_det.append({
179 'train': train,
180 'query': test,
181 'gallery': test,
182 'num_train_pids': num_train_pids,
183 'num_train_imgs': num_train_imgs,
184 'num_query_pids': num_test_pids,
185 'num_query_imgs': num_test_imgs,
186 'num_gallery_pids': num_test_pids,
187 'num_gallery_imgs': num_test_imgs
188 })
189
190 # create split for labeled images
191 train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \
192 _extract_classic_split(meta_labeled, test_split)
193 splits_classic_lab.append({
194 'train': train,
195 'query': test,
196 'gallery': test,
197 'num_train_pids': num_train_pids,
198 'num_train_imgs': num_train_imgs,
199 'num_query_pids': num_test_pids,
200 'num_query_imgs': num_test_imgs,
201 'num_gallery_pids': num_test_pids,
202 'num_gallery_imgs': num_test_imgs
203 })
204
205 with PathManager.open(self.split_classic_det_json_path, 'w') as f:
206 json.dump(splits_classic_det, f, indent=4, separators=(',', ': '))
207 with PathManager.open(self.split_classic_lab_json_path, 'w') as f:
208 json.dump(splits_classic_lab, f, indent=4, separators=(',', ': '))
209
210 def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel):
211 tmp_set = []
212 unique_pids = set()
213 for idx in idxs:
214 img_name = filelist[idx][0]
215 camid = int(img_name.split('_')[2]) - 1 # make it 0-based
216 pid = pids[idx]
217 if relabel:
218 pid = pid2label[pid]
219 img_path = osp.join(img_dir, img_name)
220 tmp_set.append((img_path, int(pid), camid))
221 unique_pids.add(pid)
222 return tmp_set, len(unique_pids), len(idxs)
223
224 def _extract_new_split(split_dict, img_dir):
225 train_idxs = split_dict['train_idx'].flatten() - 1 # index-0
226 pids = split_dict['labels'].flatten()
227 train_pids = set(pids[train_idxs])
228 pid2label = {pid: label for label, pid in enumerate(train_pids)}
229 query_idxs = split_dict['query_idx'].flatten() - 1
230 gallery_idxs = split_dict['gallery_idx'].flatten() - 1
231 filelist = split_dict['filelist'].flatten()
232 train_info = _extract_set(filelist, pids, pid2label, train_idxs, img_dir, relabel=True)
233 query_info = _extract_set(filelist, pids, pid2label, query_idxs, img_dir, relabel=False)
234 gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False)
235 return train_info, query_info, gallery_info
236
237 print('Creating new split for detected images (767/700) ...')
238 train_info, query_info, gallery_info = _extract_new_split(
239 loadmat(self.split_new_det_mat_path),
241 )
242 split = [{
243 'train': train_info[0],
244 'query': query_info[0],
245 'gallery': gallery_info[0],
246 'num_train_pids': train_info[1],
247 'num_train_imgs': train_info[2],
248 'num_query_pids': query_info[1],
249 'num_query_imgs': query_info[2],
250 'num_gallery_pids': gallery_info[1],
251 'num_gallery_imgs': gallery_info[2]
252 }]
253
254 with PathManager.open(self.split_new_det_json_path, 'w') as f:
255 json.dump(split, f, indent=4, separators=(',', ': '))
256
257 print('Creating new split for labeled images (767/700) ...')
258 train_info, query_info, gallery_info = _extract_new_split(
259 loadmat(self.split_new_lab_mat_path),
261 )
262 split = [{
263 'train': train_info[0],
264 'query': query_info[0],
265 'gallery': gallery_info[0],
266 'num_train_pids': train_info[1],
267 'num_train_imgs': train_info[2],
268 'num_query_pids': query_info[1],
269 'num_query_imgs': query_info[2],
270 'num_gallery_pids': gallery_info[1],
271 'num_gallery_imgs': gallery_info[2]
272 }]
273 with PathManager.open(self.split_new_lab_json_path, 'w') as f:
274 json.dump(split, f, indent=4, separators=(',', ': '))
check_before_run(self, required_files)
Definition bases.py:113
__init__(self, root='datasets', split_id=0, cuhk03_labeled=True, cuhk03_classic_split=False, **kwargs)
Definition cuhk03.py:34