190 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			190 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import os, sys, numpy as np, argparse
 | |
| from pathlib import Path
 | |
| import paddle.fluid as fluid
 | |
| import math, time, paddle
 | |
| import paddle.fluid.layers.ops as ops
 | |
| #from tb_paddle import SummaryWriter
 | |
| 
 | |
| lib_dir = (Path(__file__).parent / 'lib').resolve()
 | |
| if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
 | |
| from models import resnet_cifar, NASCifarNet, Networks
 | |
| from utils  import AverageMeter, time_for_file, time_string, convert_secs2time
 | |
| from utils  import reader_creator
 | |
| 
 | |
| 
 | |
| def inference_program(model_name, num_class):
 | |
|   # The image is 32 * 32 with RGB representation.
 | |
|   data_shape = [3, 32, 32]
 | |
|   images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
 | |
| 
 | |
|   if model_name == 'ResNet20':
 | |
|     predict = resnet_cifar(images,  20, num_class)
 | |
|   elif model_name == 'ResNet32':
 | |
|     predict = resnet_cifar(images,  32, num_class)
 | |
|   elif model_name == 'ResNet110':
 | |
|     predict = resnet_cifar(images, 110, num_class)
 | |
|   else:
 | |
|     predict = NASCifarNet(images, 36, 6, 3, num_class, Networks[model_name], True)
 | |
|   return predict
 | |
| 
 | |
| 
 | |
| def train_program(predict):
 | |
|   label   = fluid.layers.data(name='label', shape=[1], dtype='int64')
 | |
|   if isinstance(predict, (list, tuple)):
 | |
|     predict, aux_predict = predict
 | |
|     x_losses   = fluid.layers.cross_entropy(input=predict, label=label)
 | |
|     aux_losses = fluid.layers.cross_entropy(input=aux_predict, label=label)
 | |
|     x_loss     = fluid.layers.mean(x_losses)
 | |
|     aux_loss   = fluid.layers.mean(aux_losses)
 | |
|     loss = x_loss + aux_loss * 0.4
 | |
|     accuracy = fluid.layers.accuracy(input=predict, label=label)
 | |
|   else:
 | |
|     losses  = fluid.layers.cross_entropy(input=predict, label=label)
 | |
|     loss    = fluid.layers.mean(losses)
 | |
|     accuracy = fluid.layers.accuracy(input=predict, label=label)
 | |
|   return [loss, accuracy]
 | |
| 
 | |
| 
 | |
| # For training test cost
 | |
| def evaluation(program, reader, fetch_list, place):
 | |
|   feed_var_list = [program.global_block().var('pixel'), program.global_block().var('label')]
 | |
|   feeder_test   = fluid.DataFeeder(feed_list=feed_var_list, place=place)
 | |
|   test_exe      = fluid.Executor(place)
 | |
|   losses, accuracies = AverageMeter(), AverageMeter()
 | |
|   for tid, test_data in enumerate(reader()):
 | |
|     loss, acc = test_exe.run(program=program, feed=feeder_test.feed(test_data), fetch_list=fetch_list)
 | |
|     losses.update(float(loss), len(test_data))
 | |
|     accuracies.update(float(acc)*100, len(test_data))
 | |
|   return losses.avg, accuracies.avg
 | |
| 
 | |
| 
 | |
| def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
 | |
|   """Applies cosine decay to the learning rate.
 | |
|   lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
 | |
|   decrease lr for every mini-batch and start with warmup.
 | |
|   """
 | |
|   from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
 | |
|   from paddle.fluid.initializer import init_on_cpu
 | |
|   global_step = _decay_step_counter()
 | |
|   lr = fluid.layers.tensor.create_global_var(
 | |
|       shape=[1],
 | |
|       value=0.0,
 | |
|       dtype='float32',
 | |
|       persistable=True,
 | |
|       name="learning_rate")
 | |
| 
 | |
|   warmup_epoch = fluid.layers.fill_constant(
 | |
|       shape=[1], dtype='float32', value=float(5), force_cpu=True)
 | |
| 
 | |
|   with init_on_cpu():
 | |
|     epoch = ops.floor(global_step / step_each_epoch)
 | |
|     with fluid.layers.control_flow.Switch() as switch:
 | |
|       with switch.case(epoch < warmup_epoch):
 | |
|         decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch))
 | |
|         fluid.layers.tensor.assign(input=decayed_lr, output=lr)
 | |
|       with switch.default():
 | |
|         decayed_lr = learning_rate * \
 | |
|           (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
 | |
|         fluid.layers.tensor.assign(input=decayed_lr, output=lr)
 | |
|   return lr
 | |
| 
 | |
| 
 | |
| def main(xargs):
 | |
| 
 | |
|   save_dir = Path(xargs.log_dir) / time_for_file()
 | |
|   save_dir.mkdir(parents=True, exist_ok=True)
 | |
|   
 | |
|   print ('save dir : {:}'.format(save_dir))
 | |
|   print ('xargs : {:}'.format(xargs))
 | |
| 
 | |
|   if xargs.dataset == 'cifar-10':
 | |
|     train_data = reader_creator(xargs.data_path, 'data_batch', True , False)
 | |
|     test__data = reader_creator(xargs.data_path, 'test_batch', False, False)
 | |
|     class_num  = 10
 | |
|     print ('create cifar-10  dataset')
 | |
|   elif xargs.dataset == 'cifar-100':
 | |
|     train_data = reader_creator(xargs.data_path, 'train', True , False)
 | |
|     test__data = reader_creator(xargs.data_path, 'test' , False, False)
 | |
|     class_num  = 100
 | |
|     print ('create cifar-100 dataset')
 | |
|   else:
 | |
|     raise ValueError('invalid dataset : {:}'.format(xargs.dataset))
 | |
|   
 | |
|   train_reader = paddle.batch(
 | |
|     paddle.reader.shuffle(train_data, buf_size=5000),
 | |
|     batch_size=xargs.batch_size)
 | |
| 
 | |
|   # Reader for testing. A separated data set for testing.
 | |
|   test_reader = paddle.batch(test__data, batch_size=xargs.batch_size)
 | |
| 
 | |
|   place = fluid.CUDAPlace(0)
 | |
| 
 | |
|   main_program = fluid.default_main_program()
 | |
|   star_program = fluid.default_startup_program()
 | |
| 
 | |
|   # programs
 | |
|   predict      = inference_program(xargs.model_name, class_num)
 | |
|   [loss, accuracy] = train_program(predict)
 | |
|   print ('training program setup done')
 | |
|   test_program = main_program.clone(for_test=True)
 | |
|   print ('testing  program setup done')
 | |
| 
 | |
|   #infer_writer = SummaryWriter( str(save_dir / 'infer') )
 | |
|   #infer_writer.add_paddle_graph(fluid_program=fluid.default_main_program(), verbose=True)
 | |
|   #infer_writer.close()
 | |
|   #print(test_program.to_string(True))
 | |
| 
 | |
|   #learning_rate = fluid.layers.cosine_decay(learning_rate=xargs.lr, step_each_epoch=xargs.step_each_epoch, epochs=xargs.epochs)
 | |
|   #learning_rate = fluid.layers.cosine_decay(learning_rate=0.1, step_each_epoch=196, epochs=300)
 | |
|   learning_rate = cosine_decay_with_warmup(xargs.lr, xargs.step_each_epoch, xargs.epochs)
 | |
|   optimizer = fluid.optimizer.Momentum(
 | |
|             learning_rate=learning_rate,
 | |
|             momentum=0.9,
 | |
|             regularization=fluid.regularizer.L2Decay(0.0005),
 | |
|             use_nesterov=True)
 | |
|   optimizer.minimize( loss )
 | |
| 
 | |
|   exe = fluid.Executor(place)
 | |
| 
 | |
|   feed_var_list_loop = [main_program.global_block().var('pixel'), main_program.global_block().var('label')]
 | |
|   feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place)
 | |
|   exe.run(star_program)
 | |
| 
 | |
|   start_time, epoch_time = time.time(), AverageMeter()
 | |
|   for iepoch in range(xargs.epochs):
 | |
|     losses, accuracies, steps = AverageMeter(), AverageMeter(), 0
 | |
|     for step_id, train_data in enumerate(train_reader()):
 | |
|       tloss, tacc, xlr = exe.run(main_program, feed=feeder.feed(train_data), fetch_list=[loss, accuracy, learning_rate])
 | |
|       tloss, tacc, xlr = float(tloss), float(tacc) * 100, float(xlr)
 | |
|       steps += 1
 | |
|       losses.update(tloss, len(train_data))
 | |
|       accuracies.update(tacc, len(train_data))
 | |
|       if step_id % 100 == 0:
 | |
|         print('{:} [{:03d}/{:03d}] [{:03d}] lr = {:.7f}, loss = {:.4f} ({:.4f}), accuracy = {:.2f} ({:.2f}), error={:.2f}'.format(time_string(), iepoch, xargs.epochs, step_id, xlr, tloss, losses.avg, tacc, accuracies.avg, 100-accuracies.avg))
 | |
|     test_loss, test_acc = evaluation(test_program, test_reader, [loss, accuracy], place)
 | |
|     need_time = 'Time Left: {:}'.format( convert_secs2time(epoch_time.avg * (xargs.epochs-iepoch), True) )
 | |
|     print('{:}x[{:03d}/{:03d}] {:} train-loss = {:.4f}, train-accuracy = {:.2f}, test-loss = {:.4f}, test-accuracy = {:.2f} test-error = {:.2f} [{:} steps per epoch]\n'.format(time_string(), iepoch, xargs.epochs, need_time, losses.avg, accuracies.avg, test_loss, test_acc, 100-test_acc, steps))
 | |
|     if isinstance(predict, list):
 | |
|       fluid.io.save_inference_model(str(save_dir / 'inference_model'), ["pixel"],   predict, exe)
 | |
|     else:
 | |
|       fluid.io.save_inference_model(str(save_dir / 'inference_model'), ["pixel"], [predict], exe)
 | |
|     # measure elapsed time
 | |
|     epoch_time.update(time.time() - start_time)
 | |
|     start_time = time.time()
 | |
| 
 | |
|   print('finish training and evaluation with {:} epochs in {:}'.format(xargs.epochs, convert_secs2time(epoch_time.sum, True)))
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|   parser = argparse.ArgumentParser(description='Train.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 | |
|   parser.add_argument('--log_dir' ,       type=str,                   help='Save dir.')
 | |
|   parser.add_argument('--dataset',        type=str,                   help='The dataset name.')
 | |
|   parser.add_argument('--data_path',      type=str,                   help='The dataset path.')
 | |
|   parser.add_argument('--model_name',     type=str,                   help='The model name.')
 | |
|   parser.add_argument('--lr',             type=float,                 help='The learning rate.')
 | |
|   parser.add_argument('--batch_size',     type=int,                   help='The batch size.')
 | |
|   parser.add_argument('--step_each_epoch',type=int,                   help='The batch size.')
 | |
|   parser.add_argument('--epochs'    ,     type=int,                   help='The total training epochs.')
 | |
|   args = parser.parse_args()
 | |
|   main(args)
 |