205 lines
5.9 KiB
Python
205 lines
5.9 KiB
Python
#####################################################
|
|
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
|
|
#####################################################
|
|
import os, sys, time, torch
|
|
import torch.nn.functional as F
|
|
|
|
# modules in AutoDL
|
|
from xautodl.log_utils import AverageMeter, time_string
|
|
from .eval_funcs import obtain_accuracy
|
|
|
|
|
|
def simple_KD_train(
|
|
xloader,
|
|
teacher,
|
|
network,
|
|
criterion,
|
|
scheduler,
|
|
optimizer,
|
|
optim_config,
|
|
extra_info,
|
|
print_freq,
|
|
logger,
|
|
):
|
|
loss, acc1, acc5 = procedure(
|
|
xloader,
|
|
teacher,
|
|
network,
|
|
criterion,
|
|
scheduler,
|
|
optimizer,
|
|
"train",
|
|
optim_config,
|
|
extra_info,
|
|
print_freq,
|
|
logger,
|
|
)
|
|
return loss, acc1, acc5
|
|
|
|
|
|
def simple_KD_valid(
|
|
xloader, teacher, network, criterion, optim_config, extra_info, print_freq, logger
|
|
):
|
|
with torch.no_grad():
|
|
loss, acc1, acc5 = procedure(
|
|
xloader,
|
|
teacher,
|
|
network,
|
|
criterion,
|
|
None,
|
|
None,
|
|
"valid",
|
|
optim_config,
|
|
extra_info,
|
|
print_freq,
|
|
logger,
|
|
)
|
|
return loss, acc1, acc5
|
|
|
|
|
|
def loss_KD_fn(
|
|
criterion,
|
|
student_logits,
|
|
teacher_logits,
|
|
studentFeatures,
|
|
teacherFeatures,
|
|
targets,
|
|
alpha,
|
|
temperature,
|
|
):
|
|
basic_loss = criterion(student_logits, targets) * (1.0 - alpha)
|
|
log_student = F.log_softmax(student_logits / temperature, dim=1)
|
|
sof_teacher = F.softmax(teacher_logits / temperature, dim=1)
|
|
KD_loss = F.kl_div(log_student, sof_teacher, reduction="batchmean") * (
|
|
alpha * temperature * temperature
|
|
)
|
|
return basic_loss + KD_loss
|
|
|
|
|
|
def procedure(
|
|
xloader,
|
|
teacher,
|
|
network,
|
|
criterion,
|
|
scheduler,
|
|
optimizer,
|
|
mode,
|
|
config,
|
|
extra_info,
|
|
print_freq,
|
|
logger,
|
|
):
|
|
data_time, batch_time, losses, top1, top5 = (
|
|
AverageMeter(),
|
|
AverageMeter(),
|
|
AverageMeter(),
|
|
AverageMeter(),
|
|
AverageMeter(),
|
|
)
|
|
Ttop1, Ttop5 = AverageMeter(), AverageMeter()
|
|
if mode == "train":
|
|
network.train()
|
|
elif mode == "valid":
|
|
network.eval()
|
|
else:
|
|
raise ValueError("The mode is not right : {:}".format(mode))
|
|
teacher.eval()
|
|
|
|
logger.log(
|
|
"[{:5s}] config :: auxiliary={:}, KD :: [alpha={:.2f}, temperature={:.2f}]".format(
|
|
mode,
|
|
config.auxiliary if hasattr(config, "auxiliary") else -1,
|
|
config.KD_alpha,
|
|
config.KD_temperature,
|
|
)
|
|
)
|
|
end = time.time()
|
|
for i, (inputs, targets) in enumerate(xloader):
|
|
if mode == "train":
|
|
scheduler.update(None, 1.0 * i / len(xloader))
|
|
# measure data loading time
|
|
data_time.update(time.time() - end)
|
|
# calculate prediction and loss
|
|
targets = targets.cuda(non_blocking=True)
|
|
|
|
if mode == "train":
|
|
optimizer.zero_grad()
|
|
|
|
student_f, logits = network(inputs)
|
|
if isinstance(logits, list):
|
|
assert len(logits) == 2, "logits must has {:} items instead of {:}".format(
|
|
2, len(logits)
|
|
)
|
|
logits, logits_aux = logits
|
|
else:
|
|
logits, logits_aux = logits, None
|
|
with torch.no_grad():
|
|
teacher_f, teacher_logits = teacher(inputs)
|
|
|
|
loss = loss_KD_fn(
|
|
criterion,
|
|
logits,
|
|
teacher_logits,
|
|
student_f,
|
|
teacher_f,
|
|
targets,
|
|
config.KD_alpha,
|
|
config.KD_temperature,
|
|
)
|
|
if config is not None and hasattr(config, "auxiliary") and config.auxiliary > 0:
|
|
loss_aux = criterion(logits_aux, targets)
|
|
loss += config.auxiliary * loss_aux
|
|
|
|
if mode == "train":
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
# record
|
|
sprec1, sprec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
|
losses.update(loss.item(), inputs.size(0))
|
|
top1.update(sprec1.item(), inputs.size(0))
|
|
top5.update(sprec5.item(), inputs.size(0))
|
|
# teacher
|
|
tprec1, tprec5 = obtain_accuracy(teacher_logits.data, targets.data, topk=(1, 5))
|
|
Ttop1.update(tprec1.item(), inputs.size(0))
|
|
Ttop5.update(tprec5.item(), inputs.size(0))
|
|
|
|
# measure elapsed time
|
|
batch_time.update(time.time() - end)
|
|
end = time.time()
|
|
|
|
if i % print_freq == 0 or (i + 1) == len(xloader):
|
|
Sstr = (
|
|
" {:5s} ".format(mode.upper())
|
|
+ time_string()
|
|
+ " [{:}][{:03d}/{:03d}]".format(extra_info, i, len(xloader))
|
|
)
|
|
if scheduler is not None:
|
|
Sstr += " {:}".format(scheduler.get_min_info())
|
|
Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(
|
|
batch_time=batch_time, data_time=data_time
|
|
)
|
|
Lstr = "Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})".format(
|
|
loss=losses, top1=top1, top5=top5
|
|
)
|
|
Lstr += " Teacher : acc@1={:.2f}, acc@5={:.2f}".format(Ttop1.avg, Ttop5.avg)
|
|
Istr = "Size={:}".format(list(inputs.size()))
|
|
logger.log(Sstr + " " + Tstr + " " + Lstr + " " + Istr)
|
|
|
|
logger.log(
|
|
" **{:5s}** accuracy drop :: @1={:.2f}, @5={:.2f}".format(
|
|
mode.upper(), Ttop1.avg - top1.avg, Ttop5.avg - top5.avg
|
|
)
|
|
)
|
|
logger.log(
|
|
" **{mode:5s}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}".format(
|
|
mode=mode.upper(),
|
|
top1=top1,
|
|
top5=top5,
|
|
error1=100 - top1.avg,
|
|
error5=100 - top5.avg,
|
|
loss=losses.avg,
|
|
)
|
|
)
|
|
return losses.avg, top1.avg, top5.avg
|