436 lines
15 KiB
Python
436 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
"""Meters."""
|
|
|
|
from collections import deque
|
|
|
|
import numpy as np
|
|
import pycls.core.logging as logging
|
|
import torch
|
|
from pycls.core.config import cfg
|
|
from pycls.core.timer import Timer
|
|
|
|
|
|
logger = logging.get_logger(__name__)
|
|
|
|
|
|
def time_string(seconds):
|
|
"""Converts time in seconds to a fixed-width string format."""
|
|
days, rem = divmod(int(seconds), 24 * 3600)
|
|
hrs, rem = divmod(rem, 3600)
|
|
mins, secs = divmod(rem, 60)
|
|
return "{0:02},{1:02}:{2:02}:{3:02}".format(days, hrs, mins, secs)
|
|
|
|
|
|
def inter_union(preds, labels, num_classes):
|
|
_, preds = torch.max(preds, 1)
|
|
preds = preds.type(torch.uint8) + 1
|
|
labels = labels.type(torch.uint8) + 1
|
|
preds = preds * (labels > 0).type(torch.uint8)
|
|
|
|
inter = preds * (preds == labels).type(torch.uint8)
|
|
area_inter = torch.histc(inter.type(torch.int64), bins=num_classes, min=1, max=num_classes)
|
|
area_preds = torch.histc(preds.type(torch.int64), bins=num_classes, min=1, max=num_classes)
|
|
area_labels = torch.histc(labels.type(torch.int64), bins=num_classes, min=1, max=num_classes)
|
|
area_union = area_preds + area_labels - area_inter
|
|
|
|
return [area_inter.type(torch.float64) / labels.size(0), area_union.type(torch.float64) / labels.size(0)]
|
|
|
|
|
|
def topk_errors(preds, labels, ks):
|
|
"""Computes the top-k error for each k."""
|
|
err_str = "Batch dim of predictions and labels must match"
|
|
assert preds.size(0) == labels.size(0), err_str
|
|
# Find the top max_k predictions for each sample
|
|
_top_max_k_vals, top_max_k_inds = torch.topk(
|
|
preds, max(ks), dim=1, largest=True, sorted=True
|
|
)
|
|
# (batch_size, max_k) -> (max_k, batch_size)
|
|
top_max_k_inds = top_max_k_inds.t()
|
|
# (batch_size, ) -> (max_k, batch_size)
|
|
rep_max_k_labels = labels.view(1, -1).expand_as(top_max_k_inds)
|
|
# (i, j) = 1 if top i-th prediction for the j-th sample is correct
|
|
top_max_k_correct = top_max_k_inds.eq(rep_max_k_labels)
|
|
# Compute the number of topk correct predictions for each k
|
|
topks_correct = [top_max_k_correct[:k, :].view(-1).float().sum() for k in ks]
|
|
return [(1.0 - x / preds.size(0)) * 100.0 for x in topks_correct]
|
|
|
|
|
|
def gpu_mem_usage():
|
|
"""Computes the GPU memory usage for the current device (MB)."""
|
|
mem_usage_bytes = torch.cuda.max_memory_allocated()
|
|
return mem_usage_bytes / 1024 / 1024
|
|
|
|
|
|
class ScalarMeter(object):
|
|
"""Measures a scalar value (adapted from Detectron)."""
|
|
|
|
def __init__(self, window_size):
|
|
self.deque = deque(maxlen=window_size)
|
|
self.total = 0.0
|
|
self.count = 0
|
|
|
|
def reset(self):
|
|
self.deque.clear()
|
|
self.total = 0.0
|
|
self.count = 0
|
|
|
|
def add_value(self, value):
|
|
self.deque.append(value)
|
|
self.count += 1
|
|
self.total += value
|
|
|
|
def get_win_median(self):
|
|
return np.median(self.deque)
|
|
|
|
def get_win_avg(self):
|
|
return np.mean(self.deque)
|
|
|
|
def get_global_avg(self):
|
|
return self.total / self.count
|
|
|
|
|
|
class TrainMeter(object):
|
|
"""Measures training stats."""
|
|
|
|
def __init__(self, epoch_iters):
|
|
self.epoch_iters = epoch_iters
|
|
self.max_iter = cfg.OPTIM.MAX_EPOCH * epoch_iters
|
|
self.iter_timer = Timer()
|
|
self.loss = ScalarMeter(cfg.LOG_PERIOD)
|
|
self.loss_total = 0.0
|
|
self.lr = None
|
|
# Current minibatch errors (smoothed over a window)
|
|
self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD)
|
|
self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD)
|
|
# Number of misclassified examples
|
|
self.num_top1_mis = 0
|
|
self.num_top5_mis = 0
|
|
self.num_samples = 0
|
|
|
|
def reset(self, timer=False):
|
|
if timer:
|
|
self.iter_timer.reset()
|
|
self.loss.reset()
|
|
self.loss_total = 0.0
|
|
self.lr = None
|
|
self.mb_top1_err.reset()
|
|
self.mb_top5_err.reset()
|
|
self.num_top1_mis = 0
|
|
self.num_top5_mis = 0
|
|
self.num_samples = 0
|
|
|
|
def iter_tic(self):
|
|
self.iter_timer.tic()
|
|
|
|
def iter_toc(self):
|
|
self.iter_timer.toc()
|
|
|
|
def update_stats(self, top1_err, top5_err, loss, lr, mb_size):
|
|
# Current minibatch stats
|
|
self.mb_top1_err.add_value(top1_err)
|
|
self.mb_top5_err.add_value(top5_err)
|
|
self.loss.add_value(loss)
|
|
self.lr = lr
|
|
# Aggregate stats
|
|
self.num_top1_mis += top1_err * mb_size
|
|
self.num_top5_mis += top5_err * mb_size
|
|
self.loss_total += loss * mb_size
|
|
self.num_samples += mb_size
|
|
|
|
def get_iter_stats(self, cur_epoch, cur_iter):
|
|
cur_iter_total = cur_epoch * self.epoch_iters + cur_iter + 1
|
|
eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
|
|
mem_usage = gpu_mem_usage()
|
|
stats = {
|
|
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
|
|
"iter": "{}/{}".format(cur_iter + 1, self.epoch_iters),
|
|
"time_avg": self.iter_timer.average_time,
|
|
"time_diff": self.iter_timer.diff,
|
|
"eta": time_string(eta_sec),
|
|
"top1_err": self.mb_top1_err.get_win_median(),
|
|
"top5_err": self.mb_top5_err.get_win_median(),
|
|
"loss": self.loss.get_win_median(),
|
|
"lr": self.lr,
|
|
"mem": int(np.ceil(mem_usage)),
|
|
}
|
|
return stats
|
|
|
|
def log_iter_stats(self, cur_epoch, cur_iter):
|
|
if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
|
|
return
|
|
stats = self.get_iter_stats(cur_epoch, cur_iter)
|
|
logger.info(logging.dump_log_data(stats, "train_iter"))
|
|
|
|
def get_epoch_stats(self, cur_epoch):
|
|
cur_iter_total = (cur_epoch + 1) * self.epoch_iters
|
|
eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
|
|
mem_usage = gpu_mem_usage()
|
|
top1_err = self.num_top1_mis / self.num_samples
|
|
top5_err = self.num_top5_mis / self.num_samples
|
|
avg_loss = self.loss_total / self.num_samples
|
|
stats = {
|
|
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
|
|
"time_avg": self.iter_timer.average_time,
|
|
"eta": time_string(eta_sec),
|
|
"top1_err": top1_err,
|
|
"top5_err": top5_err,
|
|
"loss": avg_loss,
|
|
"lr": self.lr,
|
|
"mem": int(np.ceil(mem_usage)),
|
|
}
|
|
return stats
|
|
|
|
def log_epoch_stats(self, cur_epoch):
|
|
stats = self.get_epoch_stats(cur_epoch)
|
|
logger.info(logging.dump_log_data(stats, "train_epoch"))
|
|
|
|
|
|
class TestMeter(object):
|
|
"""Measures testing stats."""
|
|
|
|
def __init__(self, max_iter):
|
|
self.max_iter = max_iter
|
|
self.iter_timer = Timer()
|
|
# Current minibatch errors (smoothed over a window)
|
|
self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD)
|
|
self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD)
|
|
# Min errors (over the full test set)
|
|
self.min_top1_err = 100.0
|
|
self.min_top5_err = 100.0
|
|
# Number of misclassified examples
|
|
self.num_top1_mis = 0
|
|
self.num_top5_mis = 0
|
|
self.num_samples = 0
|
|
|
|
def reset(self, min_errs=False):
|
|
if min_errs:
|
|
self.min_top1_err = 100.0
|
|
self.min_top5_err = 100.0
|
|
self.iter_timer.reset()
|
|
self.mb_top1_err.reset()
|
|
self.mb_top5_err.reset()
|
|
self.num_top1_mis = 0
|
|
self.num_top5_mis = 0
|
|
self.num_samples = 0
|
|
|
|
def iter_tic(self):
|
|
self.iter_timer.tic()
|
|
|
|
def iter_toc(self):
|
|
self.iter_timer.toc()
|
|
|
|
def update_stats(self, top1_err, top5_err, mb_size):
|
|
self.mb_top1_err.add_value(top1_err)
|
|
self.mb_top5_err.add_value(top5_err)
|
|
self.num_top1_mis += top1_err * mb_size
|
|
self.num_top5_mis += top5_err * mb_size
|
|
self.num_samples += mb_size
|
|
|
|
def get_iter_stats(self, cur_epoch, cur_iter):
|
|
mem_usage = gpu_mem_usage()
|
|
iter_stats = {
|
|
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
|
|
"iter": "{}/{}".format(cur_iter + 1, self.max_iter),
|
|
"time_avg": self.iter_timer.average_time,
|
|
"time_diff": self.iter_timer.diff,
|
|
"top1_err": self.mb_top1_err.get_win_median(),
|
|
"top5_err": self.mb_top5_err.get_win_median(),
|
|
"mem": int(np.ceil(mem_usage)),
|
|
}
|
|
return iter_stats
|
|
|
|
def log_iter_stats(self, cur_epoch, cur_iter):
|
|
if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
|
|
return
|
|
stats = self.get_iter_stats(cur_epoch, cur_iter)
|
|
logger.info(logging.dump_log_data(stats, "test_iter"))
|
|
|
|
def get_epoch_stats(self, cur_epoch):
|
|
top1_err = self.num_top1_mis / self.num_samples
|
|
top5_err = self.num_top5_mis / self.num_samples
|
|
self.min_top1_err = min(self.min_top1_err, top1_err)
|
|
self.min_top5_err = min(self.min_top5_err, top5_err)
|
|
mem_usage = gpu_mem_usage()
|
|
stats = {
|
|
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
|
|
"time_avg": self.iter_timer.average_time,
|
|
"top1_err": top1_err,
|
|
"top5_err": top5_err,
|
|
"min_top1_err": self.min_top1_err,
|
|
"min_top5_err": self.min_top5_err,
|
|
"mem": int(np.ceil(mem_usage)),
|
|
}
|
|
return stats
|
|
|
|
def log_epoch_stats(self, cur_epoch):
|
|
stats = self.get_epoch_stats(cur_epoch)
|
|
logger.info(logging.dump_log_data(stats, "test_epoch"))
|
|
|
|
|
|
class TrainMeterIoU(object):
|
|
"""Measures training stats."""
|
|
|
|
def __init__(self, epoch_iters):
|
|
self.epoch_iters = epoch_iters
|
|
self.max_iter = cfg.OPTIM.MAX_EPOCH * epoch_iters
|
|
self.iter_timer = Timer()
|
|
self.loss = ScalarMeter(cfg.LOG_PERIOD)
|
|
self.loss_total = 0.0
|
|
self.lr = None
|
|
|
|
self.mb_miou = ScalarMeter(cfg.LOG_PERIOD)
|
|
|
|
self.num_inter = np.zeros(cfg.MODEL.NUM_CLASSES)
|
|
self.num_union = np.zeros(cfg.MODEL.NUM_CLASSES)
|
|
self.num_samples = 0
|
|
|
|
def reset(self, timer=False):
|
|
if timer:
|
|
self.iter_timer.reset()
|
|
self.loss.reset()
|
|
self.loss_total = 0.0
|
|
self.lr = None
|
|
self.mb_miou.reset()
|
|
self.num_inter = np.zeros(cfg.MODEL.NUM_CLASSES)
|
|
self.num_union = np.zeros(cfg.MODEL.NUM_CLASSES)
|
|
self.num_samples = 0
|
|
|
|
def iter_tic(self):
|
|
self.iter_timer.tic()
|
|
|
|
def iter_toc(self):
|
|
self.iter_timer.toc()
|
|
|
|
def update_stats(self, inter, union, loss, lr, mb_size):
|
|
# Current minibatch stats
|
|
self.mb_miou.add_value((inter / (union + 1e-10)).mean())
|
|
self.loss.add_value(loss)
|
|
self.lr = lr
|
|
# Aggregate stats
|
|
self.num_inter += inter * mb_size
|
|
self.num_union += union * mb_size
|
|
self.loss_total += loss * mb_size
|
|
self.num_samples += mb_size
|
|
|
|
def get_iter_stats(self, cur_epoch, cur_iter):
|
|
cur_iter_total = cur_epoch * self.epoch_iters + cur_iter + 1
|
|
eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
|
|
mem_usage = gpu_mem_usage()
|
|
stats = {
|
|
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
|
|
"iter": "{}/{}".format(cur_iter + 1, self.epoch_iters),
|
|
"time_avg": self.iter_timer.average_time,
|
|
"time_diff": self.iter_timer.diff,
|
|
"eta": time_string(eta_sec),
|
|
"miou": self.mb_miou.get_win_median(),
|
|
"loss": self.loss.get_win_median(),
|
|
"lr": self.lr,
|
|
"mem": int(np.ceil(mem_usage)),
|
|
}
|
|
return stats
|
|
|
|
def log_iter_stats(self, cur_epoch, cur_iter):
|
|
if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
|
|
return
|
|
stats = self.get_iter_stats(cur_epoch, cur_iter)
|
|
logger.info(logging.dump_log_data(stats, "train_iter"))
|
|
|
|
def get_epoch_stats(self, cur_epoch):
|
|
cur_iter_total = (cur_epoch + 1) * self.epoch_iters
|
|
eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
|
|
mem_usage = gpu_mem_usage()
|
|
miou = (self.num_inter / (self.num_union + 1e-10)).mean()
|
|
avg_loss = self.loss_total / self.num_samples
|
|
stats = {
|
|
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
|
|
"time_avg": self.iter_timer.average_time,
|
|
"eta": time_string(eta_sec),
|
|
"miou": miou,
|
|
"loss": avg_loss,
|
|
"lr": self.lr,
|
|
"mem": int(np.ceil(mem_usage)),
|
|
}
|
|
return stats
|
|
|
|
def log_epoch_stats(self, cur_epoch):
|
|
stats = self.get_epoch_stats(cur_epoch)
|
|
logger.info(logging.dump_log_data(stats, "train_epoch"))
|
|
|
|
|
|
class TestMeterIoU(object):
|
|
"""Measures testing stats."""
|
|
|
|
def __init__(self, max_iter):
|
|
self.max_iter = max_iter
|
|
self.iter_timer = Timer()
|
|
|
|
self.mb_miou = ScalarMeter(cfg.LOG_PERIOD)
|
|
|
|
self.max_miou = 0.0
|
|
|
|
self.num_inter = np.zeros(cfg.MODEL.NUM_CLASSES)
|
|
self.num_union = np.zeros(cfg.MODEL.NUM_CLASSES)
|
|
self.num_samples = 0
|
|
|
|
def reset(self, min_errs=False):
|
|
if min_errs:
|
|
self.max_miou = 0.0
|
|
self.iter_timer.reset()
|
|
self.mb_miou.reset()
|
|
self.num_inter = np.zeros(cfg.MODEL.NUM_CLASSES)
|
|
self.num_union = np.zeros(cfg.MODEL.NUM_CLASSES)
|
|
self.num_samples = 0
|
|
|
|
def iter_tic(self):
|
|
self.iter_timer.tic()
|
|
|
|
def iter_toc(self):
|
|
self.iter_timer.toc()
|
|
|
|
def update_stats(self, inter, union, mb_size):
|
|
self.mb_miou.add_value((inter / (union + 1e-10)).mean())
|
|
self.num_inter += inter * mb_size
|
|
self.num_union += union * mb_size
|
|
self.num_samples += mb_size
|
|
|
|
def get_iter_stats(self, cur_epoch, cur_iter):
|
|
mem_usage = gpu_mem_usage()
|
|
iter_stats = {
|
|
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
|
|
"iter": "{}/{}".format(cur_iter + 1, self.max_iter),
|
|
"time_avg": self.iter_timer.average_time,
|
|
"time_diff": self.iter_timer.diff,
|
|
"miou": self.mb_miou.get_win_median(),
|
|
"mem": int(np.ceil(mem_usage)),
|
|
}
|
|
return iter_stats
|
|
|
|
def log_iter_stats(self, cur_epoch, cur_iter):
|
|
if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
|
|
return
|
|
stats = self.get_iter_stats(cur_epoch, cur_iter)
|
|
logger.info(logging.dump_log_data(stats, "test_iter"))
|
|
|
|
def get_epoch_stats(self, cur_epoch):
|
|
miou = (self.num_inter / (self.num_union + 1e-10)).mean()
|
|
self.max_miou = max(self.max_miou, miou)
|
|
mem_usage = gpu_mem_usage()
|
|
stats = {
|
|
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
|
|
"time_avg": self.iter_timer.average_time,
|
|
"miou": miou,
|
|
"max_miou": self.max_miou,
|
|
"mem": int(np.ceil(mem_usage)),
|
|
}
|
|
return stats
|
|
|
|
def log_epoch_stats(self, cur_epoch):
|
|
stats = self.get_epoch_stats(cur_epoch)
|
|
logger.info(logging.dump_log_data(stats, "test_epoch"))
|