41def verify_results(cfg, results):
42 """
43 Args:
44 results (OrderedDict[dict]): task_name -> {metric -> score}
45 Returns:
46 bool: whether the verification succeeds or not
47 """
48 expected_results = cfg.TEST.EXPECTED_RESULTS
49 if not len(expected_results):
50 return True
51
52 ok = True
53 for task, metric, expected, tolerance in expected_results:
54 actual = results[task][metric]
55 if not np.isfinite(actual):
56 ok = False
57 diff = abs(actual - expected)
58 if diff > tolerance:
59 ok = False
60
61 logger = logging.getLogger(__name__)
62 if not ok:
63 logger.error("Result verification failed!")
64 logger.error("Expected Results: " + str(expected_results))
65 logger.error("Actual Results: " + pprint.pformat(results))
66
67 sys.exit(1)
68 else:
69 logger.info("Results verification passed.")
70 return ok
71
72