d6/d8d/hooks_8py_source.html

# -*- coding: utf-8 -*-

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved


import datetime

import itertools

import logging

import os

import tempfile

import time

from collections import Counter


import torch

from torch import nn

from torch.nn.parallel import DistributedDataParallel


from fastreid.evaluation.testing import flatten_results_dict

from fastreid.solver import optim

from fastreid.utils import comm

from fastreid.utils.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer

from fastreid.utils.events import EventStorage, EventWriter

from fastreid.utils.file_io import PathManager

from fastreid.utils.precision_bn import update_bn_stats, get_bn_modules

from fastreid.utils.timer import Timer

from .train_loop import HookBase


__all__ = [

    "CallbackHook",

    "IterationTimer",

    "PeriodicWriter",

    "PeriodicCheckpointer",

    "LRScheduler",

    "AutogradProfiler",

    "EvalHook",

    "PreciseBN",

    "FreezeLayer",

]


"""

Implement some common hooks.

"""


class CallbackHook(HookBase):

    """

    Create a hook using callback functions provided by the user.

    """


    def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None):

        """

        Each argument is a function that takes one argument: the trainer.

        """

        self._before_train = before_train

        self._before_step = before_step

        self._after_step = after_step

        self._after_train = after_train


    def before_train(self):

        if self._before_train:

            self._before_train(self.trainer)


    def after_train(self):

        if self._after_train:

            self._after_train(self.trainer)

        # The functions may be closures that hold reference to the trainer

        # Therefore, delete them to avoid circular reference.

        del self._before_train, self._after_train

        del self._before_step, self._after_step


    def before_step(self):

        if self._before_step:

            self._before_step(self.trainer)


    def after_step(self):

        if self._after_step:

            self._after_step(self.trainer)


class IterationTimer(HookBase):

    """

    Track the time spent for each iteration (each run_step call in the trainer).

    Print a summary in the end of training.

    This hook uses the time between the call to its :meth:`before_step`

    and :meth:`after_step` methods.

    Under the convention that :meth:`before_step` of all hooks should only

    take negligible amount of time, the :class:`IterationTimer` hook should be

    placed at the beginning of the list of hooks to obtain accurate timing.

    """


    def __init__(self, warmup_iter=3):

        """

        Args:

            warmup_iter (int): the number of iterations at the beginning to exclude

                from timing.

        """

        self._warmup_iter = warmup_iter

        self._step_timer = Timer()


    def before_train(self):

        self._start_time = time.perf_counter()

        self._total_timer = Timer()

        self._total_timer.pause()


    def after_train(self):

        logger = logging.getLogger(__name__)

        total_time = time.perf_counter() - self._start_time

        total_time_minus_hooks = self._total_timer.seconds()

        hook_time = total_time - total_time_minus_hooks


        num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter


        if num_iter > 0 and total_time_minus_hooks > 0:

            # Speed is meaningful only after warmup

            # NOTE this format is parsed by grep in some scripts

            logger.info(

                "Overall training speed: {} iterations in {} ({:.4f} s / it)".format(

                    num_iter,

                    str(datetime.timedelta(seconds=int(total_time_minus_hooks))),

                    total_time_minus_hooks / num_iter,

                )

            )


        logger.info(

            "Total training time: {} ({} on hooks)".format(

                str(datetime.timedelta(seconds=int(total_time))),

                str(datetime.timedelta(seconds=int(hook_time))),

            )

        )


    def before_step(self):

        self._step_timer.reset()

        self._total_timer.resume()


    def after_step(self):

        # +1 because we're in after_step

        iter_done = self.trainer.iter - self.trainer.start_iter + 1

        if iter_done >= self._warmup_iter:

            sec = self._step_timer.seconds()

            self.trainer.storage.put_scalars(time=sec)

        else:

            self._start_time = time.perf_counter()

            self._total_timer.reset()


        self._total_timer.pause()


class PeriodicWriter(HookBase):

    """

    Write events to EventStorage periodically.

    It is executed every ``period`` iterations and after the last iteration.

    """


    def __init__(self, writers, period=20):

        """

        Args:

            writers (list[EventWriter]): a list of EventWriter objects

            period (int):

        """

        self._writers = writers

        for w in writers:

            assert isinstance(w, EventWriter), w

        self._period = period


    def after_step(self):

        if (self.trainer.iter + 1) % self._period == 0 or (

                self.trainer.iter == self.trainer.max_iter - 1

        ):

            for writer in self._writers:

                writer.write()


    def after_train(self):

        for writer in self._writers:

            writer.close()


class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase):

    """

    Same as :class:`fastreid.utils.checkpoint.PeriodicCheckpointer`, but as a hook.

    Note that when used as a hook,

    it is unable to save additional data other than what's defined

    by the given `checkpointer`.

    It is executed every ``period`` iterations and after the last iteration.

    """


    def before_train(self):

        self.max_iter = self.trainer.max_iter


    def after_step(self):

        # No way to use **kwargs

        self.step(self.trainer.iter)


class LRScheduler(HookBase):

    """

    A hook which executes a torch builtin LR scheduler and summarizes the LR.

    It is executed after every iteration.

    """


    def __init__(self, optimizer, scheduler):

        """

        Args:

            optimizer (torch.optim.Optimizer):

            scheduler (torch.optim._LRScheduler)

        """

        self._optimizer = optimizer

        self._scheduler = scheduler


        # NOTE: some heuristics on what LR to summarize

        # summarize the param group with most parameters

        largest_group = max(len(g["params"]) for g in optimizer.param_groups)


        if largest_group == 1:

            # If all groups have one parameter,

            # then find the most common initial LR, and use it for summary

            lr_count = Counter([g["lr"] for g in optimizer.param_groups])

            lr = lr_count.most_common()[0][0]

            for i, g in enumerate(optimizer.param_groups):

                if g["lr"] == lr:

                    self._best_param_group_id = i

                    break

        else:

            for i, g in enumerate(optimizer.param_groups):

                if len(g["params"]) == largest_group:

                    self._best_param_group_id = i

                    break


    def after_step(self):

        lr = self._optimizer.param_groups[self._best_param_group_id]["lr"]

        self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False)

        self._scheduler.step()


class AutogradProfiler(HookBase):

    """

    A hook which runs `torch.autograd.profiler.profile`.

    Examples:

    .. code-block:: python

        hooks.AutogradProfiler(

             lambda trainer: trainer.iter > 10 and trainer.iter < 20, self.cfg.OUTPUT_DIR

        )

    The above example will run the profiler for iteration 10~20 and dump

    results to ``OUTPUT_DIR``. We did not profile the first few iterations

    because they are typically slower than the rest.

    The result files can be loaded in the ``chrome://tracing`` page in chrome browser.

    Note:

        When used together with NCCL on older version of GPUs,

        autograd profiler may cause deadlock because it unnecessarily allocates

        memory on every device it sees. The memory management calls, if

        interleaved with NCCL calls, lead to deadlock on GPUs that do not

        support `cudaLaunchCooperativeKernelMultiDevice`.

    """


    def __init__(self, enable_predicate, output_dir, *, use_cuda=True):

        """

        Args:

            enable_predicate (callable[trainer -> bool]): a function which takes a trainer,

                and returns whether to enable the profiler.

                It will be called once every step, and can be used to select which steps to profile.

            output_dir (str): the output directory to dump tracing files.

            use_cuda (bool): same as in `torch.autograd.profiler.profile`.

        """

        self._enable_predicate = enable_predicate

        self._use_cuda = use_cuda

        self._output_dir = output_dir


    def before_step(self):

        if self._enable_predicate(self.trainer):

            self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda)

            self._profiler.__enter__()

        else:

            self._profiler = None


    def after_step(self):

        if self._profiler is None:

            return

        self._profiler.__exit__(None, None, None)

        out_file = os.path.join(

            self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)

        )

        if "://" not in out_file:

            self._profiler.export_chrome_trace(out_file)

        else:

            # Support non-posix filesystems

            with tempfile.TemporaryDirectory(prefix="fastreid_profiler") as d:

                tmp_file = os.path.join(d, "tmp.json")

                self._profiler.export_chrome_trace(tmp_file)

                with open(tmp_file) as f:

                    content = f.read()

            with PathManager.open(out_file, "w") as f:

                f.write(content)


class EvalHook(HookBase):

    """

    Run an evaluation function periodically, and at the end of training.

    It is executed every ``eval_period`` iterations and after the last iteration.

    """


    def __init__(self, eval_period, eval_function):

        """

        Args:

            eval_period (int): the period to run `eval_function`.

            eval_function (callable): a function which takes no arguments, and

                returns a nested dict of evaluation metrics.

        Note:

            This hook must be enabled in all or none workers.

            If you would like only certain workers to perform evaluation,

            give other workers a no-op function (`eval_function=lambda: None`).

        """

        self._period = eval_period

        self._func = eval_function


    def _do_eval(self):

        results = self._func()


        if results:

            assert isinstance(

                results, dict

            ), "Eval function must return a dict. Got {} instead.".format(results)


            flattened_results = flatten_results_dict(results)

            for k, v in flattened_results.items():

                try:

                    v = float(v)

                except Exception:

                    raise ValueError(

                        "[EvalHook] eval_function should return a nested dict of float. "

                        "Got '{}: {}' instead.".format(k, v)

                    )

            self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False)


        # Remove extra memory cache of main process due to evaluation

        torch.cuda.empty_cache()


    def after_step(self):

        next_iter = self.trainer.iter + 1

        is_final = next_iter == self.trainer.max_iter

        if is_final or (self._period > 0 and next_iter % self._period == 0):

            self._do_eval()

        # Evaluation may take different time among workers.

        # A barrier make them start the next iteration together.

        comm.synchronize()


    def after_train(self):

        # func is likely a closure that holds reference to the trainer

        # therefore we clean it to avoid circular reference in the end

        del self._func


class PreciseBN(HookBase):

    """

    The standard implementation of BatchNorm uses EMA in inference, which is

    sometimes suboptimal.

    This class computes the true average of statistics rather than the moving average,

    and put true averages to every BN layer in the given model.

    It is executed after the last iteration.

    """


    def __init__(self, model, data_loader, num_iter):

        """

        Args:

            model (nn.Module): a module whose all BN layers in training mode will be

                updated by precise BN.

                Note that user is responsible for ensuring the BN layers to be

                updated are in training mode when this hook is triggered.

            data_loader (iterable): it will produce data to be run by `model(data)`.

            num_iter (int): number of iterations used to compute the precise

                statistics.

        """

        self._logger = logging.getLogger(__name__)

        if len(get_bn_modules(model)) == 0:

            self._logger.info(

                "PreciseBN is disabled because model does not contain BN layers in training mode."

            )

            self._disabled = True

            return


        self._model = model

        self._data_loader = data_loader

        self._num_iter = num_iter

        self._disabled = False


        self._data_iter = None


    def after_step(self):

        next_iter = self.trainer.iter + 1

        is_final = next_iter == self.trainer.max_iter

        if is_final:

            self.update_stats()


    def update_stats(self):

        """

        Update the model with precise statistics. Users can manually call this method.

        """

        if self._disabled:

            return


        if self._data_iter is None:

            self._data_iter = iter(self._data_loader)


        def data_loader():

            for num_iter in itertools.count(1):

                if num_iter % 100 == 0:

                    self._logger.info(

                        "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter)

                    )

                # This way we can reuse the same iterator

                yield next(self._data_iter)


        with EventStorage():  # capture events in a new storage to discard them

            self._logger.info(

                "Running precise-BN for {} iterations...  ".format(self._num_iter)

                + "Note that this could produce different statistics every time."

            )

            update_bn_stats(self._model, data_loader(), self._num_iter)


class FreezeLayer(HookBase):


    def __init__(self, model, optimizer, freeze_layers, freeze_iters):

        self._logger = logging.getLogger(__name__)


        if isinstance(model, DistributedDataParallel):

            model = model.module

        self.model = model

        self.optimizer = optimizer


        self.freeze_layers = freeze_layers

        self.freeze_iters = freeze_iters


        # Previous parameters freeze status

        param_freeze = {}

        for param_group in self.optimizer.param_groups:

            param_name = param_group['name']

            param_freeze[param_name] = param_group['freeze']

        self.param_freeze = param_freeze


        self.is_frozen = False


    def before_step(self):

        # Freeze specific layers

        if self.trainer.iter <= self.freeze_iters and not self.is_frozen:

            self.freeze_specific_layer()


        # Recover original layers status

        if self.trainer.iter > self.freeze_iters and self.is_frozen:

            self.open_all_layer()


    def freeze_specific_layer(self):

        for layer in self.freeze_layers:

            if not hasattr(self.model, layer):

                self._logger.info(f'{layer} is not an attribute of the model, will skip this layer')


        for param_group in self.optimizer.param_groups:

            param_name = param_group['name']

            if param_name.split('.')[0] in self.freeze_layers:

                param_group['freeze'] = True


        # Change BN in freeze layers to eval mode

        for name, module in self.model.named_children():

            if name in self.freeze_layers: module.eval()


        self.is_frozen = True


    def open_all_layer(self):

        self.model.train()

        for param_group in self.optimizer.param_groups:

            param_name = param_group['name']

            param_group['freeze'] = self.param_freeze[param_name]


        self.is_frozen = False


class SWA(HookBase):


    def __init__(self, swa_start: int, swa_freq: int, swa_lr_factor: float, eta_min: float, lr_sched=False, ):

        self.swa_start = swa_start

        self.swa_freq = swa_freq

        self.swa_lr_factor = swa_lr_factor

        self.eta_min = eta_min

        self.lr_sched = lr_sched


    def before_step(self):

        is_swa = self.trainer.iter == self.swa_start

        if is_swa:

            # Wrapper optimizer with SWA

            self.trainer.optimizer = optim.SWA(self.trainer.optimizer, self.swa_freq, self.swa_lr_factor)

            self.trainer.optimizer.reset_lr_to_swa()


            if self.lr_sched:

                self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(

                    optimizer=self.trainer.optimizer,

                    T_0=self.swa_freq,

                    eta_min=self.eta_min,

                )


    def after_step(self):

        next_iter = self.trainer.iter + 1


        # Use Cyclic learning rate scheduler

        if next_iter > self.swa_start and self.lr_sched:

            self.scheduler.step()


        is_final = next_iter == self.trainer.max_iter

        if is_final:

            self.trainer.optimizer.swap_swa_param()


fastreid.engine.hooks.AutogradProfiler
Definition hooks.py:232

fastreid.engine.hooks.AutogradProfiler.before_step
before_step(self)
Definition hooks.py:265

fastreid.engine.hooks.AutogradProfiler._profiler
_profiler
Definition hooks.py:267

fastreid.engine.hooks.AutogradProfiler._enable_predicate
_enable_predicate
Definition hooks.py:261

fastreid.engine.hooks.AutogradProfiler.after_step
after_step(self)
Definition hooks.py:272

fastreid.engine.hooks.AutogradProfiler._output_dir
_output_dir
Definition hooks.py:263

fastreid.engine.hooks.AutogradProfiler.trainer
trainer
Definition hooks.py:266

fastreid.engine.hooks.AutogradProfiler._use_cuda
_use_cuda
Definition hooks.py:262

fastreid.engine.hooks.AutogradProfiler.__init__
__init__(self, enable_predicate, output_dir, *use_cuda=True)
Definition hooks.py:252

fastreid.engine.hooks.CallbackHook
Definition hooks.py:43

fastreid.engine.hooks.CallbackHook._after_step
_after_step
Definition hooks.py:54

fastreid.engine.hooks.CallbackHook.after_train
after_train(self)
Definition hooks.py:61

fastreid.engine.hooks.CallbackHook._before_step
_before_step
Definition hooks.py:53

fastreid.engine.hooks.CallbackHook.__init__
__init__(self, *before_train=None, after_train=None, before_step=None, after_step=None)
Definition hooks.py:48

fastreid.engine.hooks.CallbackHook.before_step
before_step(self)
Definition hooks.py:69

fastreid.engine.hooks.CallbackHook.after_step
after_step(self)
Definition hooks.py:73

fastreid.engine.hooks.CallbackHook.before_train
before_train(self)
Definition hooks.py:57

fastreid.engine.hooks.CallbackHook._before_train
_before_train
Definition hooks.py:52

fastreid.engine.hooks.CallbackHook._after_train
_after_train
Definition hooks.py:55

fastreid.engine.hooks.CallbackHook.trainer
trainer
Definition hooks.py:59

fastreid.engine.hooks.EvalHook
Definition hooks.py:292

fastreid.engine.hooks.EvalHook._func
_func
Definition hooks.py:310

fastreid.engine.hooks.EvalHook.after_train
after_train(self)
Definition hooks.py:343

fastreid.engine.hooks.EvalHook.after_step
after_step(self)
Definition hooks.py:334

fastreid.engine.hooks.EvalHook._do_eval
_do_eval(self)
Definition hooks.py:312

fastreid.engine.hooks.EvalHook._period
_period
Definition hooks.py:309

fastreid.engine.hooks.EvalHook.__init__
__init__(self, eval_period, eval_function)
Definition hooks.py:298

fastreid.engine.hooks.FreezeLayer
Definition hooks.py:417

fastreid.engine.hooks.FreezeLayer.freeze_specific_layer
freeze_specific_layer(self)
Definition hooks.py:447

fastreid.engine.hooks.FreezeLayer.optimizer
optimizer
Definition hooks.py:424

fastreid.engine.hooks.FreezeLayer.freeze_layers
freeze_layers
Definition hooks.py:426

fastreid.engine.hooks.FreezeLayer.param_freeze
param_freeze
Definition hooks.py:434

fastreid.engine.hooks.FreezeLayer.open_all_layer
open_all_layer(self)
Definition hooks.py:463

fastreid.engine.hooks.FreezeLayer.is_frozen
is_frozen
Definition hooks.py:436

fastreid.engine.hooks.FreezeLayer.model
model
Definition hooks.py:423

fastreid.engine.hooks.FreezeLayer._logger
_logger
Definition hooks.py:419

fastreid.engine.hooks.FreezeLayer.freeze_iters
freeze_iters
Definition hooks.py:427

fastreid.engine.hooks.FreezeLayer.before_step
before_step(self)
Definition hooks.py:438

fastreid.engine.hooks.FreezeLayer.__init__
__init__(self, model, optimizer, freeze_layers, freeze_iters)
Definition hooks.py:418

fastreid.engine.hooks.IterationTimer
Definition hooks.py:78

fastreid.engine.hooks.IterationTimer.after_train
after_train(self)
Definition hooks.py:103

fastreid.engine.hooks.IterationTimer._warmup_iter
_warmup_iter
Definition hooks.py:95

fastreid.engine.hooks.IterationTimer.before_train
before_train(self)
Definition hooks.py:98

fastreid.engine.hooks.IterationTimer._start_time
_start_time
Definition hooks.py:99

fastreid.engine.hooks.IterationTimer.__init__
__init__(self, warmup_iter=3)
Definition hooks.py:89

fastreid.engine.hooks.IterationTimer._total_timer
_total_timer
Definition hooks.py:100

fastreid.engine.hooks.IterationTimer._step_timer
_step_timer
Definition hooks.py:96

fastreid.engine.hooks.IterationTimer.after_step
after_step(self)
Definition hooks.py:133

fastreid.engine.hooks.IterationTimer.before_step
before_step(self)
Definition hooks.py:129

fastreid.engine.hooks.LRScheduler
Definition hooks.py:192

fastreid.engine.hooks.LRScheduler._scheduler
_scheduler
Definition hooks.py:205

fastreid.engine.hooks.LRScheduler._best_param_group_id
_best_param_group_id
Definition hooks.py:218

fastreid.engine.hooks.LRScheduler.__init__
__init__(self, optimizer, scheduler)
Definition hooks.py:198

fastreid.engine.hooks.LRScheduler.after_step
after_step(self)
Definition hooks.py:226

fastreid.engine.hooks.LRScheduler._optimizer
_optimizer
Definition hooks.py:204

fastreid.engine.hooks.PeriodicCheckpointer
Definition hooks.py:175

fastreid.engine.hooks.PeriodicCheckpointer.after_step
after_step(self)
Definition hooks.py:187

fastreid.engine.hooks.PeriodicCheckpointer.max_iter
max_iter
Definition hooks.py:185

fastreid.engine.hooks.PeriodicCheckpointer.before_train
before_train(self)
Definition hooks.py:184

fastreid.engine.hooks.PeriodicWriter
Definition hooks.py:146

fastreid.engine.hooks.PeriodicWriter._writers
_writers
Definition hooks.py:158

fastreid.engine.hooks.PeriodicWriter.__init__
__init__(self, writers, period=20)
Definition hooks.py:152

fastreid.engine.hooks.PeriodicWriter.after_step
after_step(self)
Definition hooks.py:163

fastreid.engine.hooks.PeriodicWriter.after_train
after_train(self)
Definition hooks.py:170

fastreid.engine.hooks.PeriodicWriter._period
_period
Definition hooks.py:161

fastreid.engine.hooks.PreciseBN
Definition hooks.py:349

fastreid.engine.hooks.PreciseBN.__init__
__init__(self, model, data_loader, num_iter)
Definition hooks.py:358

fastreid.engine.hooks.PreciseBN._num_iter
_num_iter
Definition hooks.py:379

fastreid.engine.hooks.PreciseBN._logger
_logger
Definition hooks.py:369

fastreid.engine.hooks.PreciseBN.after_step
after_step(self)
Definition hooks.py:384

fastreid.engine.hooks.PreciseBN._data_iter
_data_iter
Definition hooks.py:382

fastreid.engine.hooks.PreciseBN._disabled
_disabled
Definition hooks.py:374

fastreid.engine.hooks.PreciseBN.update_stats
update_stats(self)
Definition hooks.py:390

fastreid.engine.hooks.PreciseBN._data_loader
_data_loader
Definition hooks.py:378

fastreid.engine.hooks.PreciseBN._model
_model
Definition hooks.py:377

fastreid.engine.hooks.SWA
Definition hooks.py:472

fastreid.engine.hooks.SWA.scheduler
scheduler
Definition hooks.py:488

fastreid.engine.hooks.SWA.swa_freq
swa_freq
Definition hooks.py:475

fastreid.engine.hooks.SWA.__init__
__init__(self, int swa_start, int swa_freq, float swa_lr_factor, float eta_min, lr_sched=False)
Definition hooks.py:473

fastreid.engine.hooks.SWA.lr_sched
lr_sched
Definition hooks.py:478

fastreid.engine.hooks.SWA.before_step
before_step(self)
Definition hooks.py:480

fastreid.engine.hooks.SWA.swa_start
swa_start
Definition hooks.py:474

fastreid.engine.hooks.SWA.after_step
after_step(self)
Definition hooks.py:494

fastreid.engine.hooks.SWA.eta_min
eta_min
Definition hooks.py:477

fastreid.engine.hooks.SWA.swa_lr_factor
swa_lr_factor
Definition hooks.py:476

fastreid.engine.train_loop.HookBase
Definition train_loop.py:22

fastreid.utils.events.EventStorage
Definition events.py:250

fastreid.utils.timer.Timer
Definition timer.py:8

fastreid.evaluation.testing
Definition testing.py:1

fastreid.solver
Definition __init__.py:1

fastreid.utils.checkpoint
Definition checkpoint.py:1

fastreid.utils.events
Definition events.py:1

fastreid.utils.file_io
Definition file_io.py:1

fastreid.utils.precision_bn
Definition precision_bn.py:1

fastreid.utils.timer
Definition timer.py:1

fastreid.utils
Definition __init__.py:1