Compare commits
11 Commits
aead4df707
...
main
Author | SHA1 | Date | |
---|---|---|---|
b9853a7341 | |||
24f15ad0fe | |||
4df5615380 | |||
968157b657 | |||
33452adc3b | |||
a7a6906a6d | |||
c80cfb8cac | |||
551abc31f3 | |||
aa4b38a0cc | |||
f72990a675 | |||
ff85bba9cd |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -1,2 +1,6 @@
|
||||
__pycache__/
|
||||
datasets/
|
||||
./datasets/
|
||||
swap_results.csv
|
||||
swap_results_*
|
||||
cifar-10*
|
||||
NAS-Bench-201-*
|
75
analyze.py
Normal file
75
analyze.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import csv
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy import stats
|
||||
import pandas as pd
|
||||
import argparse
|
||||
|
||||
def plot(l, thousands, filename):
|
||||
lenth = len(l)
|
||||
threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000]
|
||||
labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k']
|
||||
l = [i/lenth for i in l]
|
||||
l = l[:7]
|
||||
thousands = thousands[60:]
|
||||
thousands_labels = [str(i) + 'k' for i in range(60, 70)]
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.subplots_adjust(top=0.85)
|
||||
plt.title('Distribution of Swap Scores over 60k')
|
||||
plt.bar(thousands_labels, thousands)
|
||||
for i, v in enumerate(thousands):
|
||||
plt.text(i, v + 0.01, str(v), ha='center', va='bottom')
|
||||
plt.savefig(filename + '_60k.png')
|
||||
|
||||
datasets = filename.split('_')[-1].split('.')[0]
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.subplots_adjust(top=0.85)
|
||||
# plt.ylim(0,0.3)
|
||||
plt.title('Distribution of Swap Scores in ' + datasets)
|
||||
plt.bar(labels, l)
|
||||
for i, v in enumerate(l):
|
||||
plt.text(i, v + 0.01, str(round(v, 2)), ha='center', va='bottom')
|
||||
plt.savefig(filename)
|
||||
|
||||
def analyse(filename):
|
||||
l = [0 for i in range(10)]
|
||||
scores = []
|
||||
count = 0
|
||||
best_value = -1
|
||||
with open(filename) as file:
|
||||
reader = csv.reader(file)
|
||||
header = next(reader)
|
||||
data = [row for row in reader]
|
||||
thousands = [0 for i in range(70)]
|
||||
|
||||
for row in data:
|
||||
score = row[0]
|
||||
best_value = max(best_value, float(score))
|
||||
# print(score)
|
||||
ind = float(score) // 10000
|
||||
ind = int(ind)
|
||||
l[ind] += 1
|
||||
thousands[int(float(score) // 1000)] += 1
|
||||
acc = row[1]
|
||||
index = row[2]
|
||||
datas = list(zip(score, acc, index))
|
||||
scores.append(score)
|
||||
print(max(scores))
|
||||
results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index'])
|
||||
print(results['swap_score'].max())
|
||||
print(best_value)
|
||||
plot(l, thousands, filename + '.png')
|
||||
return stats.spearmanr(results.swap_score, results.valid_acc)[0]
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--filename', type=str, help='Filename to analyze', default='swap_results.csv')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(analyse('output' + '/' + args.filename))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
69
calculate_datasets_statistics.py
Normal file
69
calculate_datasets_statistics.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# import torch
|
||||
# import torchvision
|
||||
# import torchvision.transforms as transforms
|
||||
|
||||
# # 加载CIFAR-10数据集
|
||||
# transform = transforms.Compose([transforms.ToTensor()])
|
||||
# trainset = torchvision.datasets.CIFAR10(root='./datasets', train=True, download=True, transform=transform)
|
||||
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=10000, shuffle=False, num_workers=2)
|
||||
|
||||
# # 将所有数据加载到内存中
|
||||
# data = next(iter(trainloader))
|
||||
# images, _ = data
|
||||
|
||||
# # 计算每个通道的均值和标准差
|
||||
# mean = images.mean([0, 2, 3])
|
||||
# std = images.std([0, 2, 3])
|
||||
|
||||
# print(f'Mean: {mean}')
|
||||
# print(f'Std: {std}')
|
||||
|
||||
# results:
|
||||
# Mean: tensor([0.4935, 0.4834, 0.4472])
|
||||
# Std: tensor([0.2476, 0.2446, 0.2626])
|
||||
|
||||
import torch
|
||||
from torchvision import datasets, transforms
|
||||
from torch.utils.data import DataLoader
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Calculate mean and std of dataset')
|
||||
parser.add_argument('--dataset', type=str, default='cifar10', help='dataset name')
|
||||
parser.add_argument('--data_path', type=str, default='./datasets/cifar-10-batches-py', help='path to dataset image folder')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 设置数据集路径
|
||||
dataset_path = args.data_path
|
||||
dataset_name = args.dataset
|
||||
|
||||
# 设置数据集的transform(这里只使用了ToTensor)
|
||||
transform = transforms.Compose([
|
||||
transforms.Resize((224, 224)),
|
||||
transforms.ToTensor()
|
||||
])
|
||||
|
||||
# 使用ImageFolder加载数据集
|
||||
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)
|
||||
dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=4)
|
||||
|
||||
# 初始化变量来累积均值和标准差
|
||||
mean = torch.zeros(3)
|
||||
std = torch.zeros(3)
|
||||
nb_samples = 0
|
||||
|
||||
count = 0
|
||||
for data in dataloader:
|
||||
count += 1
|
||||
print(f'Processing batch {count}/{len(dataloader)}', end='\r')
|
||||
batch_samples = data[0].size(0)
|
||||
data = data[0].view(batch_samples, data[0].size(1), -1)
|
||||
mean += data.mean(2).sum(0)
|
||||
std += data.std(2).sum(0)
|
||||
nb_samples += batch_samples
|
||||
|
||||
mean /= nb_samples
|
||||
std /= nb_samples
|
||||
|
||||
print(f'Mean: {mean}')
|
||||
print(f'Std: {std}')
|
@@ -39,35 +39,42 @@ parser.add_argument('--seed', default=0, type=int, help='random seed')
|
||||
parser.add_argument('--device', default="cuda", type=str, nargs='?', help='setup device (cpu, mps or cuda)')
|
||||
parser.add_argument('--repeats', default=32, type=int, nargs='?', help='times of calculating the training-free metric')
|
||||
parser.add_argument('--input_samples', default=16, type=int, nargs='?', help='input batch size for training-free metric')
|
||||
parser.add_argument('--datasets', default='cifar10', type=str, help='input datasets')
|
||||
parser.add_argument('--start_index', default=0, type=int, help='start index of the networks to evaluate')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
device = torch.device(args.device)
|
||||
|
||||
# arch_info = pd.read_csv(args.data_path+'/DARTS_archs_CIFAR10.csv', names=['genotype', 'valid_acc'], sep=',')
|
||||
|
||||
train_data, _, _ = get_datasets('cifar10', args.data_path, (args.input_samples, 3, 32, 32), -1)
|
||||
train_data, _, _ = get_datasets(args.datasets, args.data_path, (args.input_samples, 3, 32, 32), -1)
|
||||
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.input_samples, num_workers=0, pin_memory=True)
|
||||
loader = iter(train_loader)
|
||||
inputs, _ = next(loader)
|
||||
|
||||
results = []
|
||||
|
||||
# nasbench_len = 15625
|
||||
nasbench_len = 15625
|
||||
filename = f'output/swap_results_{args.datasets}.csv'
|
||||
if args.datasets == 'aircraft':
|
||||
api_datasets = 'cifar10'
|
||||
|
||||
# for index, i in arch_info.iterrows():
|
||||
for i in range(nasbench_len):
|
||||
for ind in range(args.start_index,nasbench_len):
|
||||
# print(f'Evaluating network: {index}')
|
||||
print(f'Evaluating network: {i}')
|
||||
|
||||
config = api.get_net_config(i, 'cifar10')
|
||||
print(f'Evaluating network: {ind}')
|
||||
config = api.get_net_config(ind, api_datasets)
|
||||
network = get_cell_based_tiny_net(config)
|
||||
nas_results = api.query_by_index(i, 'cifar10')
|
||||
acc = nas_results[111].get_eval('ori-test')
|
||||
# nas_results = api.query_by_index(i, 'cifar10')
|
||||
# acc = nas_results[111].get_eval('ori-test')
|
||||
# nas_results = api.get_more_info(ind, api_datasets, None, hp=200, is_random=False)
|
||||
# acc = nas_results['test-accuracy']
|
||||
acc = 99
|
||||
|
||||
print(type(network))
|
||||
# print(type(network))
|
||||
start_time = time.time()
|
||||
|
||||
# network = Network(3, 10, 1, eval(i.genotype))
|
||||
@@ -93,13 +100,15 @@ if __name__ == "__main__":
|
||||
print(f'Average SWAP score: {np.mean(swap_score)}')
|
||||
print(f'Elapsed time: {end_time - start_time:.2f} seconds')
|
||||
|
||||
results.append([np.mean(swap_score), acc, i])
|
||||
results.append([np.mean(swap_score), acc, ind])
|
||||
with open(filename, 'a') as f:
|
||||
f.write(f'{np.mean(swap_score)},{acc},{ind}\n')
|
||||
|
||||
results = pd.DataFrame(results, columns=['swap_score', 'valid_acc', 'index'])
|
||||
results.to_csv('output/swap_results.csv', float_format='%.4f', index=False)
|
||||
|
||||
print()
|
||||
print(f'Spearman\'s Correlation Coefficient: {stats.spearmanr(results.swap_score, results.valid_acc)[0]}')
|
||||
results.to_csv('swap_results.csv', float_format='%.4f', index=False)
|
||||
|
||||
|
||||
|
||||
|
53
preprocess_aircraft.py
Normal file
53
preprocess_aircraft.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
# 数据集路径
|
||||
dataset_path = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images'
|
||||
test_output_path = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/test_sorted_images'
|
||||
train_output_path = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/train_sorted_images'
|
||||
|
||||
# 类别文件,例如 'images_variant_trainval.txt'
|
||||
# 有两个文件,一个是训练集和验证集,一个是测试集
|
||||
test_labels_file = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images_variant_test.txt'
|
||||
train_labels_file = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images_variant_train.txt'
|
||||
|
||||
# 创建输出文件夹
|
||||
if not os.path.exists(test_output_path):
|
||||
os.makedirs(test_output_path)
|
||||
if not os.path.exists(train_output_path):
|
||||
os.makedirs(train_output_path)
|
||||
|
||||
# 读取类别文件
|
||||
with open(test_labels_file, 'r') as f:
|
||||
test_lines = f.readlines()
|
||||
with open(train_labels_file, 'r') as f:
|
||||
train_lines = f.readlines()
|
||||
|
||||
def sort_images(lines, output_path):
|
||||
count = 0
|
||||
for line in lines:
|
||||
count += 1
|
||||
print(f'Processing image {count}/{len(lines)}', end='\r')
|
||||
parts = line.strip().split(' ')
|
||||
image_name = parts[0] + '.jpg'
|
||||
category = '_'.join(parts[1:]).replace('/', '_')
|
||||
|
||||
# 创建类别文件夹
|
||||
category_path = os.path.join(output_path, category)
|
||||
if not os.path.exists(category_path):
|
||||
os.makedirs(category_path)
|
||||
|
||||
# 移动图像到对应类别文件夹
|
||||
src = os.path.join(dataset_path, image_name)
|
||||
dst = os.path.join(category_path, image_name)
|
||||
if os.path.exists(src):
|
||||
shutil.move(src, dst)
|
||||
else:
|
||||
print(f'Image {image_name} not found!')
|
||||
|
||||
print("Sorting test images into folders by category...")
|
||||
sort_images(test_lines, test_output_path)
|
||||
print("Sorting train images into folders by category...")
|
||||
sort_images(train_lines, train_output_path)
|
||||
|
||||
print("Images have been sorted into folders by category.")
|
@@ -13,7 +13,8 @@ Dataset2Class = {'cifar10': 10,
|
||||
'ImageNet16' : 1000,
|
||||
'ImageNet16-120': 120,
|
||||
'ImageNet16-150': 150,
|
||||
'ImageNet16-200': 200}
|
||||
'ImageNet16-200': 200,
|
||||
'aircraft': 100}
|
||||
|
||||
class RandChannel(object):
|
||||
# randomly pick channels from input
|
||||
@@ -46,6 +47,10 @@ def get_datasets(name, root, input_size, cutout=-1):
|
||||
elif name.startswith('ImageNet16'):
|
||||
mean = [0.481098, 0.45749, 0.407882]
|
||||
std = [0.247922, 0.240235, 0.255255]
|
||||
elif name == 'aircraft':
|
||||
mean = [0.4785, 0.5100, 0.5338]
|
||||
std = [0.1845, 0.1830, 0.2060]
|
||||
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(name))
|
||||
|
||||
@@ -55,6 +60,12 @@ def get_datasets(name, root, input_size, cutout=-1):
|
||||
if cutout > 0 : lists += [CUTOUT(cutout)]
|
||||
train_transform = transforms.Compose(lists)
|
||||
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
|
||||
elif name == 'aircraft':
|
||||
lists = [transforms.RandomCrop(input_size[1], padding=0), transforms.ToTensor(), transforms.Normalize(mean, std)]
|
||||
if cutout > 0 : lists += [CUTOUT(cutout)]
|
||||
train_transform = transforms.Compose(lists)
|
||||
test_transform = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize(mean, std)])
|
||||
|
||||
elif name.startswith('ImageNet16'):
|
||||
lists = [transforms.RandomCrop(input_size[1], padding=0), transforms.ToTensor(), transforms.Normalize(mean, std), RandChannel(input_size[0])]
|
||||
if cutout > 0 : lists += [CUTOUT(cutout)]
|
||||
@@ -86,9 +97,12 @@ def get_datasets(name, root, input_size, cutout=-1):
|
||||
train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True)
|
||||
test_data = dset.CIFAR100(root, train=False, transform=test_transform , download=True)
|
||||
assert len(train_data) == 50000 and len(test_data) == 10000
|
||||
elif name == 'aircraft':
|
||||
train_data = dset.ImageFolder(osp.join(root, 'train_sorted_images'), train_transform)
|
||||
test_data = dset.ImageFolder(osp.join(root, 'test_sorted_images'), test_transform)
|
||||
elif name.startswith('imagenet-1k'):
|
||||
train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform)
|
||||
test_data = dset.ImageFolder(osp.join(root, 'val'), test_transform)
|
||||
test_data = dset.ImageFolder(osp.join(root, 'test'), test_transform)
|
||||
elif name == 'ImageNet16':
|
||||
root = osp.join(root, 'ImageNet16')
|
||||
train_data = ImageNet16(root, True , train_transform)
|
||||
|
Reference in New Issue
Block a user