diff --git a/exps-tf/GDAS.py b/exps-tf/GDAS.py deleted file mode 100644 index a77511e..0000000 --- a/exps-tf/GDAS.py +++ /dev/null @@ -1,172 +0,0 @@ -# [D-X-Y] -# Run GDAS -# CUDA_VISIBLE_DEVICES=0 python exps-tf/GDAS.py -# Run DARTS -# CUDA_VISIBLE_DEVICES=0 python exps-tf/GDAS.py --tau_max -1 --tau_min -1 --epochs 50 -# -import os, sys, math, time, random, argparse -import tensorflow as tf -from pathlib import Path - -lib_dir = (Path(__file__).parent / '..' / 'lib').resolve() -if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir)) - -# self-lib -from tf_models import get_cell_based_tiny_net -from tf_optimizers import SGDW, AdamW -from config_utils import dict2config -from log_utils import time_string -from models import CellStructure - - -def pre_process(image_a, label_a, image_b, label_b): - def standard_func(image): - x = tf.pad(image, [[4, 4], [4, 4], [0, 0]]) - x = tf.image.random_crop(x, [32, 32, 3]) - x = tf.image.random_flip_left_right(x) - return x - return standard_func(image_a), label_a, standard_func(image_b), label_b - - -class CosineAnnealingLR(object): - def __init__(self, warmup_epochs, epochs, initial_lr, min_lr): - self.warmup_epochs = warmup_epochs - self.epochs = epochs - self.initial_lr = initial_lr - self.min_lr = min_lr - - def get_lr(self, epoch): - if epoch < self.warmup_epochs: - lr = self.min_lr + (epoch/self.warmup_epochs) * (self.initial_lr-self.min_lr) - elif epoch >= self.epochs: - lr = self.min_lr - else: - lr = self.min_lr + (self.initial_lr-self.min_lr) * 0.5 * (1 + math.cos(math.pi * epoch / self.epochs)) - return lr - - - -def main(xargs): - cifar10 = tf.keras.datasets.cifar10 - - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - x_train, x_test = x_train / 255.0, x_test / 255.0 - x_train, x_test = x_train.astype('float32'), x_test.astype('float32') - - # Add a channels dimension - all_indexes = list(range(x_train.shape[0])) - random.shuffle(all_indexes) - s_train_idxs, s_valid_idxs = all_indexes[::2], all_indexes[1::2] - search_train_x, search_train_y = x_train[s_train_idxs], y_train[s_train_idxs] - search_valid_x, search_valid_y = x_train[s_valid_idxs], y_train[s_valid_idxs] - #x_train, x_test = x_train[..., tf.newaxis], x_test[..., tf.newaxis] - - # Use tf.data - #train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(64) - search_ds = tf.data.Dataset.from_tensor_slices((search_train_x, search_train_y, search_valid_x, search_valid_y)) - search_ds = search_ds.map(pre_process).shuffle(1000).batch(64) - - test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32) - - # Create an instance of the model - config = dict2config({'name': 'GDAS', - 'C' : xargs.channel, 'N': xargs.num_cells, 'max_nodes': xargs.max_nodes, - 'num_classes': 10, 'space': 'nas-bench-201', 'affine': True}, None) - model = get_cell_based_tiny_net(config) - num_iters_per_epoch = int(tf.data.experimental.cardinality(search_ds).numpy()) - #lr_schedular = tf.keras.experimental.CosineDecay(xargs.w_lr_max, num_iters_per_epoch*xargs.epochs, xargs.w_lr_min / xargs.w_lr_max) - lr_schedular = CosineAnnealingLR(0, xargs.epochs, xargs.w_lr_max, xargs.w_lr_min) - # Choose optimizer - loss_object = tf.keras.losses.SparseCategoricalCrossentropy() - w_optimizer = SGDW(learning_rate=xargs.w_lr_max, weight_decay=xargs.w_weight_decay, momentum=xargs.w_momentum, nesterov=True) - a_optimizer = AdamW(learning_rate=xargs.arch_learning_rate, weight_decay=xargs.arch_weight_decay, beta_1=0.5, beta_2=0.999, epsilon=1e-07) - #w_optimizer = tf.keras.optimizers.SGD(learning_rate=0.025, momentum=0.9, nesterov=True) - #a_optimizer = tf.keras.optimizers.AdamW(learning_rate=xargs.arch_learning_rate, beta_1=0.5, beta_2=0.999, epsilon=1e-07) - #### - # metrics - train_loss = tf.keras.metrics.Mean(name='train_loss') - train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') - valid_loss = tf.keras.metrics.Mean(name='valid_loss') - valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='valid_accuracy') - test_loss = tf.keras.metrics.Mean(name='test_loss') - test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') - - @tf.function - def search_step(train_images, train_labels, valid_images, valid_labels, tf_tau): - # optimize weights - with tf.GradientTape() as tape: - predictions = model(train_images, tf_tau, True) - w_loss = loss_object(train_labels, predictions) - net_w_param = model.get_weights() - gradients = tape.gradient(w_loss, net_w_param) - w_optimizer.apply_gradients(zip(gradients, net_w_param)) - train_loss(w_loss) - train_accuracy(train_labels, predictions) - # optimize alphas - with tf.GradientTape() as tape: - predictions = model(valid_images, tf_tau, True) - a_loss = loss_object(valid_labels, predictions) - net_a_param = model.get_alphas() - gradients = tape.gradient(a_loss, net_a_param) - a_optimizer.apply_gradients(zip(gradients, net_a_param)) - valid_loss(a_loss) - valid_accuracy(valid_labels, predictions) - - # TEST - @tf.function - def test_step(images, labels): - predictions = model(images) - t_loss = loss_object(labels, predictions) - - test_loss(t_loss) - test_accuracy(labels, predictions) - - print('{:} start searching with {:} epochs ({:} batches per epoch).'.format(time_string(), xargs.epochs, num_iters_per_epoch)) - - for epoch in range(xargs.epochs): - # Reset the metrics at the start of the next epoch - train_loss.reset_states() ; train_accuracy.reset_states() - test_loss.reset_states() ; test_accuracy.reset_states() - cur_tau = xargs.tau_max - (xargs.tau_max-xargs.tau_min) * epoch / (xargs.epochs-1) - tf_tau = tf.cast(cur_tau, dtype=tf.float32, name='tau') - cur_lr = lr_schedular.get_lr(epoch) - tf.keras.backend.set_value(w_optimizer.lr, cur_lr) - - for trn_imgs, trn_labels, val_imgs, val_labels in search_ds: - search_step(trn_imgs, trn_labels, val_imgs, val_labels, tf_tau) - genotype = model.genotype() - genotype = CellStructure(genotype) - - #for test_images, test_labels in test_ds: - # test_step(test_images, test_labels) - - cur_lr = float(tf.keras.backend.get_value(w_optimizer.lr)) - template = '{:} Epoch {:03d}/{:03d}, Train-Loss: {:.3f}, Train-Accuracy: {:.2f}%, Valid-Loss: {:.3f}, Valid-Accuracy: {:.2f}% | tau={:.3f} | lr={:.6f}' - print(template.format(time_string(), epoch+1, xargs.epochs, - train_loss.result(), - train_accuracy.result()*100, - valid_loss.result(), - valid_accuracy.result()*100, - cur_tau, - cur_lr)) - print('{:} genotype : {:}\n{:}\n'.format(time_string(), genotype, model.get_np_alphas())) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='NAS-Bench-201', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - # training details - parser.add_argument('--epochs' , type=int , default= 250 , help='') - parser.add_argument('--tau_max' , type=float, default= 10 , help='') - parser.add_argument('--tau_min' , type=float, default= 0.1 , help='') - parser.add_argument('--w_lr_max' , type=float, default= 0.025, help='') - parser.add_argument('--w_lr_min' , type=float, default= 0.001, help='') - parser.add_argument('--w_weight_decay' , type=float, default=0.0005, help='') - parser.add_argument('--w_momentum' , type=float, default= 0.9 , help='') - parser.add_argument('--arch_learning_rate', type=float, default=0.0003, help='') - parser.add_argument('--arch_weight_decay' , type=float, default=0.001, help='') - # marco structure - parser.add_argument('--channel' , type=int , default=16, help='') - parser.add_argument('--num_cells' , type=int , default= 5, help='') - parser.add_argument('--max_nodes' , type=int , default= 4, help='') - args = parser.parse_args() - main( args ) diff --git a/exps-tf/test-invH.py b/exps-tf/test-invH.py deleted file mode 100644 index b455506..0000000 --- a/exps-tf/test-invH.py +++ /dev/null @@ -1,46 +0,0 @@ -import os, sys, math, time, random, argparse -import tensorflow as tf -from pathlib import Path - - -def test_a(): - x = tf.Variable([[1.], [2.], [4.0]]) - with tf.GradientTape(persistent=True) as g: - trn = tf.math.exp(tf.math.reduce_sum(x)) - val = tf.math.cos(tf.math.reduce_sum(x)) - dT_dx = g.gradient(trn, x) - dV_dx = g.gradient(val, x) - hess_vector = g.gradient(dT_dx, x, output_gradients=dV_dx) - print ('calculate ok : {:}'.format(hess_vector)) - -def test_b(): - cce = tf.keras.losses.SparseCategoricalCrossentropy() - L1 = tf.convert_to_tensor([0, 1, 2]) - L2 = tf.convert_to_tensor([2, 0, 1]) - B = tf.Variable([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]) - with tf.GradientTape(persistent=True) as g: - trn = cce(L1, B) - val = cce(L2, B) - dT_dx = g.gradient(trn, B) - dV_dx = g.gradient(val, B) - hess_vector = g.gradient(dT_dx, B, output_gradients=dV_dx) - print ('calculate ok : {:}'.format(hess_vector)) - -def test_c(): - cce = tf.keras.losses.CategoricalCrossentropy() - L1 = tf.convert_to_tensor([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]) - L2 = tf.convert_to_tensor([[0., 0., 1.], [0., 1., 0.], [1., 0., 0.]]) - B = tf.Variable([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]) - with tf.GradientTape(persistent=True) as g: - trn = cce(L1, B) - val = cce(L2, B) - dT_dx = g.gradient(trn, B) - dV_dx = g.gradient(val, B) - hess_vector = g.gradient(dT_dx, B, output_gradients=dV_dx) - print ('calculate ok : {:}'.format(hess_vector)) - -if __name__ == '__main__': - print(tf.__version__) - test_c() - #test_b() - #test_a() diff --git a/exps/NAS-Bench-201/test-nas-api-vis.py b/exps/NAS-Bench-201/test-nas-api-vis.py index f08ec5f..98ef293 100644 --- a/exps/NAS-Bench-201/test-nas-api-vis.py +++ b/exps/NAS-Bench-201/test-nas-api-vis.py @@ -94,11 +94,11 @@ def visualize_sss_info(api, dataset, vis_save_dir): params.append(info['params']) flops.append(info['flops']) # accuracy - info = api.get_more_info(index, dataset, hp='90') + info = api.get_more_info(index, dataset, hp='90', is_random=False) train_accs.append(info['train-accuracy']) test_accs.append(info['test-accuracy']) if dataset == 'cifar10': - info = api.get_more_info(index, 'cifar10-valid', hp='90') + info = api.get_more_info(index, 'cifar10-valid', hp='90', is_random=False) valid_accs.append(info['valid-accuracy']) else: valid_accs.append(info['valid-accuracy']) @@ -182,11 +182,11 @@ def visualize_tss_info(api, dataset, vis_save_dir): params.append(info['params']) flops.append(info['flops']) # accuracy - info = api.get_more_info(index, dataset, hp='200') + info = api.get_more_info(index, dataset, hp='200', is_random=False) train_accs.append(info['train-accuracy']) test_accs.append(info['test-accuracy']) if dataset == 'cifar10': - info = api.get_more_info(index, 'cifar10-valid', hp='200') + info = api.get_more_info(index, 'cifar10-valid', hp='200', is_random=False) valid_accs.append(info['valid-accuracy']) else: valid_accs.append(info['valid-accuracy']) @@ -319,6 +319,68 @@ def visualize_rank_info(api, vis_save_dir, indicator): plt.close('all') +def calculate_correlation(*vectors): + matrix = [] + for i, vectori in enumerate(vectors): + x = [] + for j, vectorj in enumerate(vectors): + x.append( np.corrcoef(vectori, vectorj)[0,1] ) + matrix.append( x ) + return np.array(matrix) + + +def visualize_all_rank_info(api, vis_save_dir, indicator): + vis_save_dir = vis_save_dir.resolve() + # print ('{:} start to visualize {:} information'.format(time_string(), api)) + vis_save_dir.mkdir(parents=True, exist_ok=True) + + cifar010_cache_path = vis_save_dir / '{:}-cache-{:}-info.pth'.format('cifar10', indicator) + cifar100_cache_path = vis_save_dir / '{:}-cache-{:}-info.pth'.format('cifar100', indicator) + imagenet_cache_path = vis_save_dir / '{:}-cache-{:}-info.pth'.format('ImageNet16-120', indicator) + cifar010_info = torch.load(cifar010_cache_path) + cifar100_info = torch.load(cifar100_cache_path) + imagenet_info = torch.load(imagenet_cache_path) + indexes = list(range(len(cifar010_info['params']))) + + print ('{:} start to visualize relative ranking'.format(time_string())) + + + dpi, width, height = 250, 3200, 1400 + figsize = width / float(dpi), height / float(dpi) + LabelSize, LegendFontsize = 14, 14 + + fig, axs = plt.subplots(1, 2, figsize=figsize) + ax1, ax2 = axs + + sns_size = 15 + CoRelMatrix = calculate_correlation(cifar010_info['valid_accs'], cifar010_info['test_accs'], cifar100_info['valid_accs'], cifar100_info['test_accs'], imagenet_info['valid_accs'], imagenet_info['test_accs']) + + sns.heatmap(CoRelMatrix, annot=True, annot_kws={'size':sns_size}, fmt='.3f', linewidths=0.5, ax=ax1, + xticklabels=['C10-V', 'C10-T', 'C100-V', 'C100-T', 'I120-V', 'I120-T'], + yticklabels=['C10-V', 'C10-T', 'C100-V', 'C100-T', 'I120-V', 'I120-T']) + + selected_indexes, acc_bar = [], 92 + for i, acc in enumerate(cifar010_info['test_accs']): + if acc > acc_bar: selected_indexes.append( i ) + cifar010_valid_accs = np.array(cifar010_info['valid_accs'])[ selected_indexes ] + cifar010_test_accs = np.array(cifar010_info['test_accs']) [ selected_indexes ] + cifar100_valid_accs = np.array(cifar100_info['valid_accs'])[ selected_indexes ] + cifar100_test_accs = np.array(cifar100_info['test_accs']) [ selected_indexes ] + imagenet_valid_accs = np.array(imagenet_info['valid_accs'])[ selected_indexes ] + imagenet_test_accs = np.array(imagenet_info['test_accs']) [ selected_indexes ] + CoRelMatrix = calculate_correlation(cifar010_valid_accs, cifar010_test_accs, cifar100_valid_accs, cifar100_test_accs, imagenet_valid_accs, imagenet_test_accs) + + sns.heatmap(CoRelMatrix, annot=True, annot_kws={'size':sns_size}, fmt='.3f', linewidths=0.5, ax=ax2, + xticklabels=['C10-V', 'C10-T', 'C100-V', 'C100-T', 'I120-V', 'I120-T'], + yticklabels=['C10-V', 'C10-T', 'C100-V', 'C100-T', 'I120-V', 'I120-T']) + ax1.set_title('Correlation coefficient over ALL candidates') + ax2.set_title('Correlation coefficient over candidates with accuracy > {:}%'.format(acc_bar)) + save_path = (vis_save_dir / '{:}-all-relative-rank.png'.format(indicator)).resolve() + fig.savefig(save_path, dpi=dpi, bbox_inches='tight', format='png') + print ('{:} save into {:}'.format(time_string(), save_path)) + plt.close('all') + + if __name__ == '__main__': parser = argparse.ArgumentParser(description='NAS-Bench-X', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--save_dir', type=str, default='output/NAS-BENCH-202', help='Folder to save checkpoints and log.') @@ -326,20 +388,19 @@ if __name__ == '__main__': # use for train the model args = parser.parse_args() - visualize_rank_info(None, Path('output/vis-nas-bench/'), 'tss') - visualize_rank_info(None, Path('output/vis-nas-bench/'), 'sss') - + datasets = ['cifar10', 'cifar100', 'ImageNet16-120'] api201 = NASBench201API(None, verbose=True) - visualize_tss_info(api201, 'cifar10', Path('output/vis-nas-bench')) - visualize_tss_info(api201, 'cifar100', Path('output/vis-nas-bench')) - visualize_tss_info(api201, 'ImageNet16-120', Path('output/vis-nas-bench')) + for xdata in datasets: + visualize_tss_info(api201, xdata, Path('output/vis-nas-bench')) api301 = NASBench301API(None, verbose=True) - visualize_sss_info(api301, 'cifar10', Path('output/vis-nas-bench')) - visualize_sss_info(api301, 'cifar100', Path('output/vis-nas-bench')) - visualize_sss_info(api301, 'ImageNet16-120', Path('output/vis-nas-bench')) + for xdata in datasets: + visualize_sss_info(api301, xdata, Path('output/vis-nas-bench')) visualize_info(None, Path('output/vis-nas-bench/'), 'tss') visualize_info(None, Path('output/vis-nas-bench/'), 'sss') visualize_rank_info(None, Path('output/vis-nas-bench/'), 'tss') visualize_rank_info(None, Path('output/vis-nas-bench/'), 'sss') + + visualize_all_rank_info(None, Path('output/vis-nas-bench/'), 'tss') + visualize_all_rank_info(None, Path('output/vis-nas-bench/'), 'sss') diff --git a/exps/vis/test.py b/exps/experimental/test-nas-plot.py similarity index 100% rename from exps/vis/test.py rename to exps/experimental/test-nas-plot.py