no need to read the api again and again

This commit is contained in:
mhz 2024-06-29 17:16:08 +02:00
parent df26eef77c
commit 66fe70028e

View File

@ -79,7 +79,7 @@ class DataModule(AbstractDataModule):
source = './NAS-Bench-201-v1_1-096897.pth'
dataset = Dataset(source=source, root=root_path, target_prop=target, transform=None)
self.dataset = dataset
self.api = dataset.api
# self.api = dataset.api
# if len(self.task.split('-')) == 2:
# train_index, val_index, test_index, unlabeled_index = self.fixed_split(dataset)
@ -628,12 +628,12 @@ class Dataset(InMemoryDataset):
self.target_prop = target_prop
source = '/home/stud/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
self.source = source
self.api = API(source) # Initialize NAS-Bench-201 API
print('API loaded')
# self.api = API(source) # Initialize NAS-Bench-201 API
# print('API loaded')
super().__init__(root, transform, pre_transform, pre_filter)
print(self.processed_paths[0]) #/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth.pt
print('Dataset initialized')
self.data, self.slices = torch.load(self.processed_paths[0])
print('Dataset initialized')
self.data.edge_attr = self.data.edge_attr.squeeze()
self.data.idx = torch.arange(len(self.data.y))
print(f"self.data={self.data}, self.slices={self.slices}")
@ -647,82 +647,146 @@ class Dataset(InMemoryDataset):
return [f'{self.source}.pt']
def process(self):
def parse_architecture_string(arch_str):
stages = arch_str.split('+')
nodes = ['input']
edges = []
for stage in stages:
operations = stage.strip('|').split('|')
for op in operations:
operation, idx = op.split('~')
idx = int(idx)
edges.append((idx, len(nodes))) # Add edge from idx to the new node
nodes.append(operation)
nodes.append('output') # Add the output node
return nodes, edges
source = '/home/stud/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
self.api = API(source)
def create_graph(nodes, edges):
G = nx.DiGraph()
for i, node in enumerate(nodes):
G.add_node(i, label=node)
G.add_edges_from(edges)
return G
def arch_to_graph(arch_str, sa, sc, target, target2=None, target3=None):
nodes, edges = parse_architecture_string(arch_str)
node_labels = [bonds[node] for node in nodes] # Replace with appropriate encoding if necessary
assert 0 not in node_labels, f'Invalid node label: {node_labels}'
x = torch.LongTensor(node_labels)
print(f'in initialize Dataset, arch_to_Graph x={x}')
edges_list = [(start, end) for start, end in edges]
edge_type = [bonds[nodes[end]] for start, end in edges] # Example: using end node type as edge type
edge_index = torch.tensor(edges_list, dtype=torch.long).t().contiguous()
edge_type = torch.tensor(edge_type, dtype=torch.long)
edge_attr = edge_type.view(-1, 1)
if target3 is not None:
y = torch.tensor([sa, sc, target, target2, target3], dtype=torch.float).view(1, -1)
elif target2 is not None:
y = torch.tensor([sa, sc, target, target2], dtype=torch.float).view(1, -1)
else:
y = torch.tensor([sa, sc, target], dtype=torch.float).view(1, -1)
print(f'in initialize Dataset, Data_init, x={x}, y={y}, edge_index={edge_index}, edge_attr={edge_attr}')
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
return data, nodes
bonds = {
'nor_conv_1x1': 1,
'nor_conv_3x3': 2,
'avg_pool_3x3': 3,
'skip_connect': 4,
'output': 5,
'none': 6,
'input': 7
}
# Prepare to process NAS-Bench-201 data
data_list = []
len_data = len(self.api) # Number of architectures
with tqdm(total=len_data) as pbar:
for arch_index in range(len_data):
arch_info = self.api.query_meta_info_by_index(arch_index)
arch_str = arch_info.arch_str
sa = np.random.rand() # Placeholder for synthetic accessibility
sc = np.random.rand() # Placeholder for substructure count
target = np.random.rand() # Placeholder for target value
target2 = np.random.rand() # Placeholder for second target value
target3 = np.random.rand() # Placeholder for third target value
len_data = len(self.api)
data, active_nodes = arch_to_graph(arch_str, sa, sc, target, target2, target3)
def graph_to_graph_data(graph):
ops = graph[1]
adj = graph[0]
nodes = []
for op in ops:
nodes.append(op_type[op])
x = torch.LongTensor(nodes)
edges_list = []
edge_type = []
for start in range(len(ops)):
for end in range(len(ops)):
if adj[start][end] == 1:
edges_list.append((start, end))
edge_type.append(1)
edges_list.append((end, start))
edge_type.append(1)
edge_index = torch.tensor(edges_list, dtype=torch.long).t()
edge_type = torch.tensor(edge_type, dtype=torch.long)
edge_attr = edge_type
y = torch.tensor([0], dtype=torch.float).view(1, -1)
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
return data
graph_list = []
with tqdm(total = len_data) as pbar:
active_nodes = set()
for i in range(len_data):
arch_info = self.api.query_meta_info_by_index(i)
nodes, edges = parse_architecture_string(arch_info.arch_str)
adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)
for op in ops:
if op not in active_nodes:
active_nodes.add(op)
graph_list.append({
"adj_matrix": adj_matrix,
"ops": ops,
"idx": i
})
data = graph_to_graph_data((adj_matrix, ops))
data_list.append(data)
pbar.update(1)
for graph in graph_list:
adj_matrix = graph['adj_matrix']
if isinstance(adj_matrix, np.ndarray):
adj_matrix = adj_matrix.tolist()
graph['adj_matrix'] = adj_matrix
ops = graph['ops']
if isinstance(ops, np.ndarray):
ops = ops.tolist()
graph['ops'] = ops
with open(f'nasbench-201-graph.json', 'w') as f:
json.dump(graph_list, f)
torch.save(self.collate(data_list), self.processed_paths[0])
# def parse_architecture_string(arch_str):
# stages = arch_str.split('+')
# nodes = ['input']
# edges = []
# for stage in stages:
# operations = stage.strip('|').split('|')
# for op in operations:
# operation, idx = op.split('~')
# idx = int(idx)
# edges.append((idx, len(nodes))) # Add edge from idx to the new node
# nodes.append(operation)
# nodes.append('output') # Add the output node
# return nodes, edges
# def create_graph(nodes, edges):
# G = nx.DiGraph()
# for i, node in enumerate(nodes):
# G.add_node(i, label=node)
# G.add_edges_from(edges)
# return G
# def arch_to_graph(arch_str, sa, sc, target, target2=None, target3=None):
# nodes, edges = parse_architecture_string(arch_str)
# node_labels = [bonds[node] for node in nodes] # Replace with appropriate encoding if necessary
# assert 0 not in node_labels, f'Invalid node label: {node_labels}'
# x = torch.LongTensor(node_labels)
# print(f'in initialize Dataset, arch_to_Graph x={x}')
# edges_list = [(start, end) for start, end in edges]
# edge_type = [bonds[nodes[end]] for start, end in edges] # Example: using end node type as edge type
# edge_index = torch.tensor(edges_list, dtype=torch.long).t().contiguous()
# edge_type = torch.tensor(edge_type, dtype=torch.long)
# edge_attr = edge_type.view(-1, 1)
# if target3 is not None:
# y = torch.tensor([sa, sc, target, target2, target3], dtype=torch.float).view(1, -1)
# elif target2 is not None:
# y = torch.tensor([sa, sc, target, target2], dtype=torch.float).view(1, -1)
# else:
# y = torch.tensor([sa, sc, target], dtype=torch.float).view(1, -1)
# print(f'in initialize Dataset, Data_init, x={x}, y={y}, edge_index={edge_index}, edge_attr={edge_attr}')
# data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
# return data, nodes
# bonds = {
# 'nor_conv_1x1': 1,
# 'nor_conv_3x3': 2,
# 'avg_pool_3x3': 3,
# 'skip_connect': 4,
# 'output': 5,
# 'none': 6,
# 'input': 7
# }
# # Prepare to process NAS-Bench-201 data
# data_list = []
# len_data = len(self.api) # Number of architectures
# with tqdm(total=len_data) as pbar:
# for arch_index in range(len_data):
# arch_info = self.api.query_meta_info_by_index(arch_index)
# arch_str = arch_info.arch_str
# sa = np.random.rand() # Placeholder for synthetic accessibility
# sc = np.random.rand() # Placeholder for substructure count
# target = np.random.rand() # Placeholder for target value
# target2 = np.random.rand() # Placeholder for second target value
# target3 = np.random.rand() # Placeholder for third target value
# data, active_nodes = arch_to_graph(arch_str, sa, sc, target, target2, target3)
# data_list.append(data)
# pbar.update(1)
# torch.save(self.collate(data_list), self.processed_paths[0])
class Dataset_origin(InMemoryDataset):
def __init__(self, source, root, target_prop=None,
transform=None, pre_transform=None, pre_filter=None):
@ -841,7 +905,7 @@ class DataInfos(AbstractDatasetInfos):
self.task = task_name
self.task_type = tasktype_dict.get(task_name, "regression")
self.ensure_connected = cfg.model.ensure_connected
self.api = dataset.api
# self.api = dataset.api
datadir = cfg.dataset.datadir
@ -853,20 +917,34 @@ class DataInfos(AbstractDatasetInfos):
ops_type = {}
len_ops = set()
# api = API('/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth')
for i in range(length):
arch_info = self.api.query_meta_info_by_index(i)
nodes, edges = parse_architecture_string(arch_info.arch_str)
adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)
def read_adj_ops_from_json(filename):
with open(filename, 'r') as json_file:
data = json.load(json_file)
adj_ops_pairs = []
for item in data:
adj_matrix = np.array(item['adjacency_matrix'])
ops = item['operations']
adj_ops_pairs.append((adj_matrix, ops))
return adj_ops_pairs
# for i in range(length):
# arch_info = self.api.query_meta_info_by_index(i)
# nodes, edges = parse_architecture_string(arch_info.arch_str)
# adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)
# if i < 5:
# print("Adjacency Matrix:")
# print(adj_matrix)
# print("Operations List:")
# print(ops)
for op in ops:
if op not in ops_type:
ops_type[op] = len(ops_type)
len_ops.add(len(ops))
graphs.append((adj_matrix, ops))
# for op in ops:
# if op not in ops_type:
# ops_type[op] = len(ops_type)
# len_ops.add(len(ops))
# graphs.append((adj_matrix, ops))
graphs = read_adj_ops_from_json(f'nasbench-201.meta.json')
# check first five graphs
for i in range(5):
@ -879,13 +957,13 @@ class DataInfos(AbstractDatasetInfos):
self.max_n_nodes = meta_dict['max_n_nodes']
self.original_max_n_nodes = meta_dict['max_n_nodes']
self.n_nodes = torch.Tensor(meta_dict['n_nodes_per_graph'])
self.edge_types = torch.Tensor(meta_dict['edge_type_dist'])
self.edge_types = torch.Tensor(meta_dict['edge_type_list'])
self.transition_E = torch.Tensor(meta_dict['transition_E'])
self.node_decoder = meta_dict['active_nodes']
node_types = torch.Tensor(meta_dict['node_type_dist'])
node_types = torch.Tensor(meta_dict['node_type_list'])
active_index = (node_types > 0).nonzero().squeeze()
self.node_types = torch.Tensor(meta_dict['node_type_dist'])[active_index]
self.node_types = torch.Tensor(meta_dict['node_type_list'])[active_index]
self.nodes_dist = DistributionNodes(self.n_nodes)
self.active_index = active_index