import os, sys, numpy as np, argparse from pathlib import Path import paddle.fluid as fluid import math, time, paddle import paddle.fluid.layers.ops as ops #from tb_paddle import SummaryWriter lib_dir = (Path(__file__).parent / 'lib').resolve() if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir)) from models import resnet_cifar, NASCifarNet, Networks from utils import AverageMeter, time_for_file, time_string, convert_secs2time from utils import reader_creator def inference_program(model_name, num_class): # The image is 32 * 32 with RGB representation. data_shape = [3, 32, 32] images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') if model_name == 'ResNet20': predict = resnet_cifar(images, 20, num_class) elif model_name == 'ResNet32': predict = resnet_cifar(images, 32, num_class) elif model_name == 'ResNet110': predict = resnet_cifar(images, 110, num_class) else: predict = NASCifarNet(images, 36, 6, 3, num_class, Networks[model_name], True) return predict def train_program(predict): label = fluid.layers.data(name='label', shape=[1], dtype='int64') if isinstance(predict, (list, tuple)): predict, aux_predict = predict x_losses = fluid.layers.cross_entropy(input=predict, label=label) aux_losses = fluid.layers.cross_entropy(input=aux_predict, label=label) x_loss = fluid.layers.mean(x_losses) aux_loss = fluid.layers.mean(aux_losses) loss = x_loss + aux_loss * 0.4 accuracy = fluid.layers.accuracy(input=predict, label=label) else: losses = fluid.layers.cross_entropy(input=predict, label=label) loss = fluid.layers.mean(losses) accuracy = fluid.layers.accuracy(input=predict, label=label) return [loss, accuracy] # For training test cost def evaluation(program, reader, fetch_list, place): feed_var_list = [program.global_block().var('pixel'), program.global_block().var('label')] feeder_test = fluid.DataFeeder(feed_list=feed_var_list, place=place) test_exe = fluid.Executor(place) losses, accuracies = AverageMeter(), AverageMeter() for tid, test_data in enumerate(reader()): loss, acc = test_exe.run(program=program, feed=feeder_test.feed(test_data), fetch_list=fetch_list) losses.update(float(loss), len(test_data)) accuracies.update(float(acc)*100, len(test_data)) return losses.avg, accuracies.avg def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): """Applies cosine decay to the learning rate. lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) decrease lr for every mini-batch and start with warmup. """ from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.initializer import init_on_cpu global_step = _decay_step_counter() lr = fluid.layers.tensor.create_global_var( shape=[1], value=0.0, dtype='float32', persistable=True, name="learning_rate") warmup_epoch = fluid.layers.fill_constant( shape=[1], dtype='float32', value=float(5), force_cpu=True) with init_on_cpu(): epoch = ops.floor(global_step / step_each_epoch) with fluid.layers.control_flow.Switch() as switch: with switch.case(epoch < warmup_epoch): decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch)) fluid.layers.tensor.assign(input=decayed_lr, output=lr) with switch.default(): decayed_lr = learning_rate * \ (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 fluid.layers.tensor.assign(input=decayed_lr, output=lr) return lr def main(xargs): save_dir = Path(xargs.log_dir) / time_for_file() save_dir.mkdir(parents=True, exist_ok=True) print ('save dir : {:}'.format(save_dir)) print ('xargs : {:}'.format(xargs)) if xargs.dataset == 'cifar-10': train_data = reader_creator(xargs.data_path, 'data_batch', True , False) test__data = reader_creator(xargs.data_path, 'test_batch', False, False) class_num = 10 print ('create cifar-10 dataset') elif xargs.dataset == 'cifar-100': train_data = reader_creator(xargs.data_path, 'train', True , False) test__data = reader_creator(xargs.data_path, 'test' , False, False) class_num = 100 print ('create cifar-100 dataset') else: raise ValueError('invalid dataset : {:}'.format(xargs.dataset)) train_reader = paddle.batch( paddle.reader.shuffle(train_data, buf_size=5000), batch_size=xargs.batch_size) # Reader for testing. A separated data set for testing. test_reader = paddle.batch(test__data, batch_size=xargs.batch_size) place = fluid.CUDAPlace(0) main_program = fluid.default_main_program() star_program = fluid.default_startup_program() # programs predict = inference_program(xargs.model_name, class_num) [loss, accuracy] = train_program(predict) print ('training program setup done') test_program = main_program.clone(for_test=True) print ('testing program setup done') #infer_writer = SummaryWriter( str(save_dir / 'infer') ) #infer_writer.add_paddle_graph(fluid_program=fluid.default_main_program(), verbose=True) #infer_writer.close() #print(test_program.to_string(True)) #learning_rate = fluid.layers.cosine_decay(learning_rate=xargs.lr, step_each_epoch=xargs.step_each_epoch, epochs=xargs.epochs) #learning_rate = fluid.layers.cosine_decay(learning_rate=0.1, step_each_epoch=196, epochs=300) learning_rate = cosine_decay_with_warmup(xargs.lr, xargs.step_each_epoch, xargs.epochs) optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(0.0005), use_nesterov=True) optimizer.minimize( loss ) exe = fluid.Executor(place) feed_var_list_loop = [main_program.global_block().var('pixel'), main_program.global_block().var('label')] feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place) exe.run(star_program) start_time, epoch_time = time.time(), AverageMeter() for iepoch in range(xargs.epochs): losses, accuracies, steps = AverageMeter(), AverageMeter(), 0 for step_id, train_data in enumerate(train_reader()): tloss, tacc, xlr = exe.run(main_program, feed=feeder.feed(train_data), fetch_list=[loss, accuracy, learning_rate]) tloss, tacc, xlr = float(tloss), float(tacc) * 100, float(xlr) steps += 1 losses.update(tloss, len(train_data)) accuracies.update(tacc, len(train_data)) if step_id % 100 == 0: print('{:} [{:03d}/{:03d}] [{:03d}] lr = {:.7f}, loss = {:.4f} ({:.4f}), accuracy = {:.2f} ({:.2f}), error={:.2f}'.format(time_string(), iepoch, xargs.epochs, step_id, xlr, tloss, losses.avg, tacc, accuracies.avg, 100-accuracies.avg)) test_loss, test_acc = evaluation(test_program, test_reader, [loss, accuracy], place) need_time = 'Time Left: {:}'.format( convert_secs2time(epoch_time.avg * (xargs.epochs-iepoch), True) ) print('{:}x[{:03d}/{:03d}] {:} train-loss = {:.4f}, train-accuracy = {:.2f}, test-loss = {:.4f}, test-accuracy = {:.2f} test-error = {:.2f} [{:} steps per epoch]\n'.format(time_string(), iepoch, xargs.epochs, need_time, losses.avg, accuracies.avg, test_loss, test_acc, 100-test_acc, steps)) if isinstance(predict, list): fluid.io.save_inference_model(str(save_dir / 'inference_model'), ["pixel"], predict, exe) else: fluid.io.save_inference_model(str(save_dir / 'inference_model'), ["pixel"], [predict], exe) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() print('finish training and evaluation with {:} epochs in {:}'.format(xargs.epochs, convert_secs2time(epoch_time.sum, True))) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Train.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--log_dir' , type=str, help='Save dir.') parser.add_argument('--dataset', type=str, help='The dataset name.') parser.add_argument('--data_path', type=str, help='The dataset path.') parser.add_argument('--model_name', type=str, help='The model name.') parser.add_argument('--lr', type=float, help='The learning rate.') parser.add_argument('--batch_size', type=int, help='The batch size.') parser.add_argument('--step_each_epoch',type=int, help='The batch size.') parser.add_argument('--epochs' , type=int, help='The total training epochs.') args = parser.parse_args() main(args)