205 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			205 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #####################################################
 | |
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
 | |
| #####################################################
 | |
| import os, sys, time, torch
 | |
| import torch.nn.functional as F
 | |
| 
 | |
| # our modules
 | |
| from log_utils import AverageMeter, time_string
 | |
| from utils import obtain_accuracy
 | |
| 
 | |
| 
 | |
| def simple_KD_train(
 | |
|     xloader,
 | |
|     teacher,
 | |
|     network,
 | |
|     criterion,
 | |
|     scheduler,
 | |
|     optimizer,
 | |
|     optim_config,
 | |
|     extra_info,
 | |
|     print_freq,
 | |
|     logger,
 | |
| ):
 | |
|     loss, acc1, acc5 = procedure(
 | |
|         xloader,
 | |
|         teacher,
 | |
|         network,
 | |
|         criterion,
 | |
|         scheduler,
 | |
|         optimizer,
 | |
|         "train",
 | |
|         optim_config,
 | |
|         extra_info,
 | |
|         print_freq,
 | |
|         logger,
 | |
|     )
 | |
|     return loss, acc1, acc5
 | |
| 
 | |
| 
 | |
| def simple_KD_valid(
 | |
|     xloader, teacher, network, criterion, optim_config, extra_info, print_freq, logger
 | |
| ):
 | |
|     with torch.no_grad():
 | |
|         loss, acc1, acc5 = procedure(
 | |
|             xloader,
 | |
|             teacher,
 | |
|             network,
 | |
|             criterion,
 | |
|             None,
 | |
|             None,
 | |
|             "valid",
 | |
|             optim_config,
 | |
|             extra_info,
 | |
|             print_freq,
 | |
|             logger,
 | |
|         )
 | |
|     return loss, acc1, acc5
 | |
| 
 | |
| 
 | |
| def loss_KD_fn(
 | |
|     criterion,
 | |
|     student_logits,
 | |
|     teacher_logits,
 | |
|     studentFeatures,
 | |
|     teacherFeatures,
 | |
|     targets,
 | |
|     alpha,
 | |
|     temperature,
 | |
| ):
 | |
|     basic_loss = criterion(student_logits, targets) * (1.0 - alpha)
 | |
|     log_student = F.log_softmax(student_logits / temperature, dim=1)
 | |
|     sof_teacher = F.softmax(teacher_logits / temperature, dim=1)
 | |
|     KD_loss = F.kl_div(log_student, sof_teacher, reduction="batchmean") * (
 | |
|         alpha * temperature * temperature
 | |
|     )
 | |
|     return basic_loss + KD_loss
 | |
| 
 | |
| 
 | |
| def procedure(
 | |
|     xloader,
 | |
|     teacher,
 | |
|     network,
 | |
|     criterion,
 | |
|     scheduler,
 | |
|     optimizer,
 | |
|     mode,
 | |
|     config,
 | |
|     extra_info,
 | |
|     print_freq,
 | |
|     logger,
 | |
| ):
 | |
|     data_time, batch_time, losses, top1, top5 = (
 | |
|         AverageMeter(),
 | |
|         AverageMeter(),
 | |
|         AverageMeter(),
 | |
|         AverageMeter(),
 | |
|         AverageMeter(),
 | |
|     )
 | |
|     Ttop1, Ttop5 = AverageMeter(), AverageMeter()
 | |
|     if mode == "train":
 | |
|         network.train()
 | |
|     elif mode == "valid":
 | |
|         network.eval()
 | |
|     else:
 | |
|         raise ValueError("The mode is not right : {:}".format(mode))
 | |
|     teacher.eval()
 | |
| 
 | |
|     logger.log(
 | |
|         "[{:5s}] config :: auxiliary={:}, KD :: [alpha={:.2f}, temperature={:.2f}]".format(
 | |
|             mode,
 | |
|             config.auxiliary if hasattr(config, "auxiliary") else -1,
 | |
|             config.KD_alpha,
 | |
|             config.KD_temperature,
 | |
|         )
 | |
|     )
 | |
|     end = time.time()
 | |
|     for i, (inputs, targets) in enumerate(xloader):
 | |
|         if mode == "train":
 | |
|             scheduler.update(None, 1.0 * i / len(xloader))
 | |
|         # measure data loading time
 | |
|         data_time.update(time.time() - end)
 | |
|         # calculate prediction and loss
 | |
|         targets = targets.cuda(non_blocking=True)
 | |
| 
 | |
|         if mode == "train":
 | |
|             optimizer.zero_grad()
 | |
| 
 | |
|         student_f, logits = network(inputs)
 | |
|         if isinstance(logits, list):
 | |
|             assert len(logits) == 2, "logits must has {:} items instead of {:}".format(
 | |
|                 2, len(logits)
 | |
|             )
 | |
|             logits, logits_aux = logits
 | |
|         else:
 | |
|             logits, logits_aux = logits, None
 | |
|         with torch.no_grad():
 | |
|             teacher_f, teacher_logits = teacher(inputs)
 | |
| 
 | |
|         loss = loss_KD_fn(
 | |
|             criterion,
 | |
|             logits,
 | |
|             teacher_logits,
 | |
|             student_f,
 | |
|             teacher_f,
 | |
|             targets,
 | |
|             config.KD_alpha,
 | |
|             config.KD_temperature,
 | |
|         )
 | |
|         if config is not None and hasattr(config, "auxiliary") and config.auxiliary > 0:
 | |
|             loss_aux = criterion(logits_aux, targets)
 | |
|             loss += config.auxiliary * loss_aux
 | |
| 
 | |
|         if mode == "train":
 | |
|             loss.backward()
 | |
|             optimizer.step()
 | |
| 
 | |
|         # record
 | |
|         sprec1, sprec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
 | |
|         losses.update(loss.item(), inputs.size(0))
 | |
|         top1.update(sprec1.item(), inputs.size(0))
 | |
|         top5.update(sprec5.item(), inputs.size(0))
 | |
|         # teacher
 | |
|         tprec1, tprec5 = obtain_accuracy(teacher_logits.data, targets.data, topk=(1, 5))
 | |
|         Ttop1.update(tprec1.item(), inputs.size(0))
 | |
|         Ttop5.update(tprec5.item(), inputs.size(0))
 | |
| 
 | |
|         # measure elapsed time
 | |
|         batch_time.update(time.time() - end)
 | |
|         end = time.time()
 | |
| 
 | |
|         if i % print_freq == 0 or (i + 1) == len(xloader):
 | |
|             Sstr = (
 | |
|                 " {:5s} ".format(mode.upper())
 | |
|                 + time_string()
 | |
|                 + " [{:}][{:03d}/{:03d}]".format(extra_info, i, len(xloader))
 | |
|             )
 | |
|             if scheduler is not None:
 | |
|                 Sstr += " {:}".format(scheduler.get_min_info())
 | |
|             Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(
 | |
|                 batch_time=batch_time, data_time=data_time
 | |
|             )
 | |
|             Lstr = "Loss {loss.val:.3f} ({loss.avg:.3f})  Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})".format(
 | |
|                 loss=losses, top1=top1, top5=top5
 | |
|             )
 | |
|             Lstr += " Teacher : acc@1={:.2f}, acc@5={:.2f}".format(Ttop1.avg, Ttop5.avg)
 | |
|             Istr = "Size={:}".format(list(inputs.size()))
 | |
|             logger.log(Sstr + " " + Tstr + " " + Lstr + " " + Istr)
 | |
| 
 | |
|     logger.log(
 | |
|         " **{:5s}** accuracy drop :: @1={:.2f}, @5={:.2f}".format(
 | |
|             mode.upper(), Ttop1.avg - top1.avg, Ttop5.avg - top5.avg
 | |
|         )
 | |
|     )
 | |
|     logger.log(
 | |
|         " **{mode:5s}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}".format(
 | |
|             mode=mode.upper(),
 | |
|             top1=top1,
 | |
|             top5=top5,
 | |
|             error1=100 - top1.avg,
 | |
|             error5=100 - top5.avg,
 | |
|             loss=losses.avg,
 | |
|         )
 | |
|     )
 | |
|     return losses.avg, top1.avg, top5.avg
 |