9 Commits

Author SHA1 Message Date
mhz
d36e1d1077 adjust threshhold for cifar100 2024-08-29 10:37:42 +02:00
mhz
82183d3df7 add read swap csv codes 2024-08-29 09:25:15 +02:00
mhz
c86db9b6ba add a test performance script. 2024-08-26 20:12:47 +02:00
mhz
a0473008a1 find the mysterious 94.37 2024-08-25 16:10:23 +02:00
mhz
05ee34e355 update the script to use nasbench-201 api 2024-08-21 10:40:00 +02:00
mhz
6d9db64a48 explore the 201 space script 2024-08-21 10:26:02 +02:00
mhz
3950a8438d set batch_y to 1 and want to test 15625 2024-08-20 22:15:25 +02:00
mhz
1fa2d49c11 set y's points 2024-08-20 21:57:47 +02:00
mhz
3c92e754d3 update the nasbench data 2024-08-20 09:24:39 +02:00
8 changed files with 47848 additions and 187672 deletions

View File

@@ -32,7 +32,7 @@ model:
ensure_connected: True
train:
# n_epochs: 5000
n_epochs: 10
n_epochs: 500
batch_size: 1200
lr: 0.0002
clip_grad: null

View File

@@ -25,7 +25,9 @@ from sklearn.model_selection import train_test_split
import utils as utils
from datasets.abstract_dataset import AbstractDatasetInfos, AbstractDataModule
from diffusion.distributions import DistributionNodes
# from naswot.score_networks import get_nasbench201_idx_score
from naswot.score_networks import get_nasbench201_idx_score
from naswot import nasspace
from naswot import datasets as dt
import networkx as nx
@@ -682,7 +684,7 @@ class Dataset(InMemoryDataset):
data_list = []
# len_data = len(self.api)
len_data = 1000
len_data = 15625
def check_valid_graph(nodes, edges):
if len(nodes) != edges.shape[0] or len(nodes) != edges.shape[1]:
return False
@@ -745,11 +747,9 @@ class Dataset(InMemoryDataset):
print(f'edges size: {edges.shape}, nodes size: {len(nodes)}')
return edges,nodes
def get_nasbench_201_val(idx):
pass
# def graph_to_graph_data(graph, idx):
def graph_to_graph_data(graph):
def graph_to_graph_data(graph, idx, train_loader, searchspace, args, device):
# def graph_to_graph_data(graph):
ops = graph[1]
adj = graph[0]
nodes = []
@@ -770,12 +770,58 @@ class Dataset(InMemoryDataset):
edge_index = torch.tensor(edges_list, dtype=torch.long).t()
edge_type = torch.tensor(edge_type, dtype=torch.long)
edge_attr = edge_type
y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
# y = get_nasbench_201_val(idx)
# y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
# y = get_nasbench201_idx_score(idx, train_loader, searchspace, args, device)
y = self.swap_scores[idx]
print(y, idx)
if y > 60000:
print(f'idx={idx}, y={y}')
y = torch.tensor([1, 1], dtype=torch.float).view(1, -1)
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
else:
print(f'idx={idx}, y={y}')
y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
# return None
return data
graph_list = []
class Args:
pass
args = Args()
args.trainval = True
args.augtype = 'none'
args.repeat = 1
args.score = 'hook_logdet'
args.sigma = 0.05
args.nasspace = 'nasbench201'
args.batch_size = 128
args.GPU = '0'
args.dataset = 'cifar10'
args.api_loc = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
args.data_loc = '../cifardata/'
args.seed = 777
args.init = ''
args.save_loc = 'results'
args.save_string = 'naswot'
args.dropout = False
args.maxofn = 1
args.n_samples = 100
args.n_runs = 500
args.stem_out_channels = 16
args.num_stacks = 3
args.num_modules_per_stack = 3
args.num_labels = 1
searchspace = nasspace.get_search_space(args)
train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
self.swap_scores = []
import csv
# with open('/nfs/data3/hanzhang/nasbenchDiT/graph_dit/swap_results.csv', 'r') as f:
with open('/nfs/data3/hanzhang/nasbenchDiT/graph_dit/swap_results_cifar100.csv', 'r') as f:
reader = csv.reader(f)
header = next(reader)
data = [row for row in reader]
self.swap_scores = [float(row[0]) for row in data]
device = torch.device('cuda:2')
with tqdm(total = len_data) as pbar:
active_nodes = set()
file_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json'
@@ -785,25 +831,17 @@ class Dataset(InMemoryDataset):
flex_graph_list = []
flex_graph_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json'
for graph in graph_list:
# arch_info = self.api.query_meta_info_by_index(i)
# results = self.api.query_by_index(i, 'cifar100')
print(f'iterate every graph in graph_list, here is {i}')
arch_info = graph['arch_str']
# results =
# nodes, edges = parse_architecture_string(arch_info.arch_str)
# ops, adj_matrix = parse_architecture_string(arch_info.arch_str, padding=4)
ops, adj_matrix, ori_nodes, ori_adj = parse_architecture_string(arch_info, padding=4)
# adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)
for op in ops:
if op not in active_nodes:
active_nodes.add(op)
data = graph_to_graph_data((adj_matrix, ops))
# with open(flex_graph_path, 'a') as f:
# flex_graph = {
# 'adj_matrix': adj_matrix,
# 'ops': ops,
# }
# json.dump(flex_graph, f)
data = graph_to_graph_data((adj_matrix, ops),idx=i, train_loader=train_loader, searchspace=searchspace, args=args, device=device)
i += 1
if data is None:
pbar.update(1)
continue
flex_graph_list.append({
'adj_matrix':adj_matrix,
'ops': ops,
@@ -816,18 +854,12 @@ class Dataset(InMemoryDataset):
f.write(str(data.edge_attr))
data_list.append(data)
new_adj, new_ops = generate_flex_adj_mat(ori_nodes=ori_nodes, ori_edges=ori_adj, max_nodes=12, min_nodes=9, random_ratio=0.5)
flex_graph_list.append({
'adj_matrix':new_adj.tolist(),
'ops': new_ops,
})
# with open(flex_graph_path, 'w') as f:
# flex_graph = {
# new_adj, new_ops = generate_flex_adj_mat(ori_nodes=ori_nodes, ori_edges=ori_adj, max_nodes=12, min_nodes=9, random_ratio=0.5)
# flex_graph_list.append({
# 'adj_matrix':new_adj.tolist(),
# 'ops': new_ops,
# }
# json.dump(flex_graph, f)
data_list.append(graph_to_graph_data((new_adj, new_ops)))
# })
# data_list.append(graph_to_graph_data((new_adj, new_ops)))
# graph_list.append({
# "adj_matrix": adj_matrix,
@@ -859,6 +891,7 @@ class Dataset(InMemoryDataset):
# "seed": seed,
# }for seed, result in results.items()]
# })
# i += 1
pbar.update(1)
for graph in graph_list:
@@ -872,8 +905,8 @@ class Dataset(InMemoryDataset):
graph['ops'] = ops
with open(f'nasbench-201-graph.json', 'w') as f:
json.dump(graph_list, f)
with open(flex_graph_path, 'w') as f:
json.dump(flex_graph_list, f)
# with open(flex_graph_path, 'w') as f:
# json.dump(flex_graph_list, f)
torch.save(self.collate(data_list), self.processed_paths[0])
@@ -1148,7 +1181,8 @@ class DataInfos(AbstractDatasetInfos):
# ops_type[op] = len(ops_type)
# len_ops.add(len(ops))
# graphs.append((adj_matrix, ops))
graphs = read_adj_ops_from_json(f'/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json')
# graphs = read_adj_ops_from_json(f'/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json')
graphs = read_adj_ops_from_json(f'/nfs/data3/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json')
# check first five graphs
for i in range(5):

View File

@@ -356,7 +356,8 @@ class Graph_DiT(pl.LightningModule):
to_generate = min(samples_left_to_generate, bs)
to_save = min(samples_left_to_save, bs)
chains_save = min(chains_left_to_save, bs)
batch_y = test_y_collection[batch_id : batch_id + to_generate]
# batch_y = test_y_collection[batch_id : batch_id + to_generate]
batch_y = torch.ones(to_generate, self.ydim_output, device=self.device)
cur_sample = self.sample_batch(batch_id, to_generate, batch_y, save_final=to_save,
keep_chain=chains_save, number_chain_steps=self.number_chain_steps)

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

85
graph_dit/exp_201/main.py Normal file
View File

@@ -0,0 +1,85 @@
import matplotlib.pyplot as plt
import pandas as pd
from nas_201_api import NASBench201API as API
# from naswot.score_networks import get_nasbench201_idx_score
# from naswot import datasets as dt
# from naswot import nasspace
# class Args():
# pass
# args = Args()
# args.trainval = True
# args.augtype = 'none'
# args.repeat = 1
# args.score = 'hook_logdet'
# args.sigma = 0.05
# args.nasspace = 'nasbench201'
# args.batch_size = 128
# args.GPU = '0'
# args.dataset = 'cifar10'
# args.api_loc = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
# args.data_loc = '../cifardata/'
# args.seed = 777
# args.init = ''
# args.save_loc = 'results'
# args.save_string = 'naswot'
# args.dropout = False
# args.maxofn = 1
# args.n_samples = 100
# args.n_runs = 500
# args.stem_out_channels = 16
# args.num_stacks = 3
# args.num_modules_per_stack = 3
# args.num_labels = 1
# searchspace = nasspace.get_search_space(args)
# train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
# device = torch.device('cuda:2')
source = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
api = API(source)
# 示例百分数列表,精确到小数点后两位
# percentages = [5.12, 15.78, 25.43, 35.22, 45.99, 55.34, 65.12, 75.68, 85.99, 95.25, 23.45, 12.34, 37.89, 58.67, 64.23, 72.15, 81.76, 99.99, 42.11, 61.58, 77.34, 14.56]
percentages = []
len_201 = 15625
for i in range(len_201):
# percentage = get_nasbench201_idx_score(i, train_loader, searchspace, args, device)
results = api.query_by_index(i, 'cifar10')
result = results[111].get_eval('ori-test')
percentages.append(result)
# 定义10%区间
bins = [i for i in range(0, 101, 10)]
# 对数据进行分箱,计算每个区间的数据量
hist, bin_edges = pd.cut(percentages, bins=bins, right=False, retbins=True, include_lowest=True)
bin_counts = hist.value_counts().sort_index()
total_counts = len(percentages)
percentages_in_bins = (bin_counts / total_counts) * 100
# 绘制条形图
plt.figure(figsize=(10, 6))
bars = plt.bar(bin_counts.index.astype(str), bin_counts.values, width=0.9, color='skyblue')
for bar, percentage in zip(bars, percentages_in_bins):
plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(),
f'{percentage:.2f}%', ha='center', va='bottom')
# 添加标题和标签
plt.title('Distribution of Percentages in 10% Intervals')
plt.xlabel('Percentage Interval')
plt.ylabel('Count')
# 显示图表
plt.xticks(rotation=45)
plt.savefig('barplog.png')

View File

@@ -0,0 +1 @@
{"source": "nasbench-201", "num_graph": 15625, "n_nodes_per_graph": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "max_n_nodes": 8, "max_n_edges": 8, "node_type_list": [0.125, 0.15, 0.15, 0.15, 0.15, 0.15, 0.125, 0.0], "edge_type_list": [0.6666666666666666, 0.3333333333333333], "valencies": [0.125, 0.15, 0.15, 0.15, 0.15, 0.15, 0.125, 0.0], "active_nodes": ["*", "input", "nor_conv_1x1", "nor_conv_3x3", "avg_pool_3x3", "skip_connect", "none"], "num_active_nodes": 7, "transition_E": [[[1.0, 0.0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [1.0, 0.0], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[1.0, 0.0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [1.0, 0.0], [1.0, 0.0]], [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]]}

File diff suppressed because one or more lines are too long

144
graph_dit/test_perf.py Normal file
View File

@@ -0,0 +1,144 @@
from nas_201_api import NASBench201API as API
import re
import pandas as pd
import json
import numpy as np
import argparse
api = API('./NAS-Bench-201-v1_1-096897.pth')
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--file_path', type=str, default='211035.txt',)
parser.add_argument('--datasets', type=str, default='cifar10',)
args = parser.parse_args()
def process_graph_data(text):
# Split the input text into sections for each graph
graph_sections = text.strip().split('nodes:')
# Prepare lists to store data
nodes_list = []
edges_list = []
results_list = []
for section in graph_sections[1:]:
# Extract nodes
nodes_section = section.split('edges:')[0]
nodes_match = re.search(r'(tensor\(\d+\) ?)+', section)
if nodes_match:
nodes = re.findall(r'tensor\((\d+)\)', nodes_match.group(0))
nodes_list.append(nodes)
# Extract edges
edge_section = section.split('edges:')[1]
edges_match = re.search(r'edges:', section)
if edges_match:
edges = re.findall(r'tensor\((\d+)\)', edge_section)
edges_list.append(edges)
# Extract the last floating point number as a result
# Create a DataFrame to store the extracted data
data = {
'nodes': nodes_list,
'edges': edges_list,
}
data['nodes'] = [[int(x) for x in node] for node in data['nodes']]
data['edges'] = [[int(x) for x in edge] for edge in data['edges']]
def split_list(input_list, chunk_size):
return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
data['edges'] = [split_list(edge, 8) for edge in data['edges']]
print(data)
df = pd.DataFrame(data)
print('df')
print(df['nodes'][0], df['edges'][0])
return df
def is_valid_nasbench201(adj, ops):
print(ops)
if ops[0] != 0 or ops[-1] != 6:
return False
for i in range(2, len(ops) - 1):
if ops[i] not in [1, 2, 3, 4, 5]:
return False
adj_mat = [ [0, 1, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 1 ,0 ,0],
[0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0]]
for i in range(len(adj)):
for j in range(len(adj[i])):
if adj[i][j] not in [0, 1]:
return False
if j > i:
if adj[i][j] != adj_mat[i][j]:
return False
return True
num_to_op = ['input', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3', 'skip_connect', 'none', 'output']
def nodes_to_arch_str(nodes):
nodes_str = [num_to_op[node] for node in nodes]
arch_str = '|' + nodes_str[1] + '~0|+' + \
'|' + nodes_str[2] + '~0|' + nodes_str[3] + '~1|+' +\
'|' + nodes_str[4] + '~0|' + nodes_str[5] + '~1|' + nodes_str[6] + '~2|'
return arch_str
filename = args.file_path
datasets_name = args.datasets
with open('./output_graphs/' + filename, 'r') as f:
texts = f.read()
df = process_graph_data(texts)
valid = 0
not_valid = 0
scores = []
# 定义分类标准和分布字典的映射
thresholds = {
'cifar10': [90, 91, 92, 93, 94],
'cifar100': [68,69,70, 71, 72, 73]
}
dist = {f'<{threshold}': 0 for threshold in thresholds[datasets_name]}
dist[f'>{thresholds[datasets_name][-1]}'] = 0
for i in range(len(df)):
nodes = df['nodes'][i]
edges = df['edges'][i]
result = is_valid_nasbench201(edges, nodes)
if result:
valid += 1
arch_str = nodes_to_arch_str(nodes)
index = api.query_index_by_arch(arch_str)
res = api.get_more_info(index, datasets_name, None, hp=200, is_random=False)
acc = res['test-accuracy']
scores.append((index, acc))
# 根据阈值更新分布
updated = False
for threshold in thresholds[datasets_name]:
if acc < threshold:
dist[f'<{threshold}'] += 1
updated = True
break
if not updated:
dist[f'>{thresholds[datasets_name][-1]}'] += 1
else:
not_valid += 1
with open('./output_graphs/' + filename + '_' + datasets_name +'.json', 'w') as f:
json.dump(scores, f)
print(scores)
print(valid, not_valid)
print(dist)
print("mean: ", np.mean([x[1] for x in scores]))
print("max: ", np.max([x[1] for x in scores]))
print("min: ", np.min([x[1] for x in scores]))