Compare commits
	
		
			22 Commits
		
	
	
		
			main
			...
			d44900c8ba
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| d44900c8ba | |||
| 73324083ce | |||
| 0c3cfb195a | |||
| 4d1dea1179 | |||
| 7147679c42 | |||
| 817ef04c58 | |||
| dd31fda8d5 | |||
| 572f030677 | |||
| ba008ae54c | |||
| f5911be781 | |||
| be8bb16f61 | |||
| 0fc6f6e686 | |||
| d57575586d | |||
| 7274b3f606 | |||
| 66fe70028e | |||
| df26eef77c | |||
| 222470a43c | |||
| a7f7010da7 | |||
| 14186fa97f | |||
| a222c514d9 | |||
| 062a27b83f | |||
| 0c7c525680 | 
| @@ -2,6 +2,7 @@ general: | ||||
|     name: 'graph_dit' | ||||
|     wandb: 'disabled'  | ||||
|     gpus: 1 | ||||
|     gpu_number: 3 | ||||
|     resume: null | ||||
|     test_only: null | ||||
|     sample_every_val: 2500 | ||||
| @@ -10,7 +11,7 @@ general: | ||||
|     chains_to_save: 1 | ||||
|     log_every_steps: 50 | ||||
|     number_chain_steps: 8 | ||||
|     final_model_samples_to_generate: 10000 | ||||
|     final_model_samples_to_generate: 100 | ||||
|     final_model_samples_to_save: 20 | ||||
|     final_model_chains_to_save: 1 | ||||
|     enable_progress_bar: False | ||||
| @@ -30,7 +31,7 @@ model: | ||||
|     lambda_train: [1, 10]  # node and edge training weight  | ||||
|     ensure_connected: True | ||||
| train: | ||||
|     n_epochs: 10000 | ||||
|     n_epochs: 5000 | ||||
|     batch_size: 1200 | ||||
|     lr: 0.0002 | ||||
|     clip_grad: null | ||||
|   | ||||
| @@ -116,7 +116,7 @@ class AbstractDatasetInfos: | ||||
|     def compute_input_output_dims(self, datamodule): | ||||
|         example_batch = datamodule.example_batch() | ||||
|         example_batch_x = torch.nn.functional.one_hot(example_batch.x, num_classes=118).float()[:, self.active_index] | ||||
|         example_batch_edge_attr = torch.nn.functional.one_hot(example_batch.edge_attr, num_classes=10).float() | ||||
|         example_batch_edge_attr = torch.nn.functional.one_hot(example_batch.edge_attr, num_classes=2).float() | ||||
|  | ||||
|         self.input_dims = {'X': example_batch_x.size(1),  | ||||
|                            'E': example_batch_edge_attr.size(1),  | ||||
| @@ -127,4 +127,19 @@ class AbstractDatasetInfos: | ||||
|         print('input dims') | ||||
|         print(self.input_dims) | ||||
|         print('output dims') | ||||
|         print(self.output_dims) | ||||
|     def compute_graph_input_output_dims(self, datamodule): | ||||
|         example_batch = datamodule.example_batch() | ||||
|         example_batch_x = torch.nn.functional.one_hot(example_batch.x, num_classes=8).float()[:, self.active_index] | ||||
|         example_batch_edge_attr = torch.nn.functional.one_hot(example_batch.edge_attr, num_classes=2).float() | ||||
|  | ||||
|         self.input_dims = {'X': example_batch_x.size(1), | ||||
|                            'E': example_batch_edge_attr.size(1), | ||||
|                            'y': example_batch['y'].size(1)} | ||||
|         self.output_dims = {'X': example_batch_x.size(1), | ||||
|                             'E': example_batch_edge_attr.size(1), | ||||
|                             'y': example_batch['y'].size(1)} | ||||
|         print('input dims') | ||||
|         print(self.input_dims) | ||||
|         print('output dims') | ||||
|         print(self.output_dims) | ||||
| @@ -39,6 +39,16 @@ op_to_atom = { | ||||
|     'none': 'S',           # Sulfur for no operation | ||||
|     'output': 'He'         # Helium for output | ||||
| } | ||||
|  | ||||
| op_type = { | ||||
|     'nor_conv_1x1': 1, | ||||
|     'nor_conv_3x3': 2, | ||||
|     'avg_pool_3x3': 3, | ||||
|     'skip_connect': 4, | ||||
|     'output': 5, | ||||
|     'none': 6, | ||||
|     'input': 7 | ||||
| } | ||||
| class DataModule(AbstractDataModule): | ||||
|     def __init__(self, cfg): | ||||
|         self.datadir = cfg.dataset.datadir | ||||
| @@ -50,12 +60,12 @@ class DataModule(AbstractDataModule): | ||||
|  | ||||
|     def prepare_data(self) -> None: | ||||
|         target = getattr(self.cfg.dataset, 'guidance_target', None) | ||||
|         print("target", target) | ||||
|         print("target", target) # nasbench-201 | ||||
|         # try: | ||||
|         #     base_path = pathlib.Path(os.path.realpath(__file__)).parents[2] | ||||
|         # except NameError: | ||||
|         # base_path = pathlib.Path(os.getcwd()).parent[2] | ||||
|         base_path = '/home/stud/hanzhang/Graph-Dit' | ||||
|         base_path = '/home/stud/hanzhang/nasbenchDiT' | ||||
|         root_path = os.path.join(base_path, self.datadir) | ||||
|         self.root_path = root_path | ||||
|  | ||||
| @@ -68,13 +78,16 @@ class DataModule(AbstractDataModule): | ||||
|         # Dataset has target property, root path, and transform | ||||
|         source = './NAS-Bench-201-v1_1-096897.pth' | ||||
|         dataset = Dataset(source=source, root=root_path, target_prop=target, transform=None) | ||||
|         self.dataset = dataset | ||||
|         # self.api = dataset.api | ||||
|  | ||||
|         # if len(self.task.split('-')) == 2: | ||||
|         #     train_index, val_index, test_index, unlabeled_index = self.fixed_split(dataset) | ||||
|         # else: | ||||
|         train_index, val_index, test_index, unlabeled_index = self.random_data_split(dataset) | ||||
|  | ||||
|         self.train_index, self.val_index, self.test_index, self.unlabeled_index = train_index, val_index, test_index, unlabeled_index | ||||
|         self.train_index, self.val_index, self.test_index, self.unlabeled_index = ( | ||||
|             train_index, val_index, test_index, unlabeled_index) | ||||
|         train_index, val_index, test_index, unlabeled_index = torch.LongTensor(train_index), torch.LongTensor(val_index), torch.LongTensor(test_index), torch.LongTensor(unlabeled_index) | ||||
|         if len(unlabeled_index) > 0: | ||||
|             train_index = torch.cat([train_index, unlabeled_index], dim=0) | ||||
| @@ -175,6 +188,27 @@ class DataModule(AbstractDataModule): | ||||
|         smiles = Chem.MolToSmiles(mol) | ||||
|         return smiles | ||||
|  | ||||
|     def get_train_graphs(self): | ||||
|         train_graphs = [] | ||||
|         test_graphs = [] | ||||
|         for graph in self.train_dataset: | ||||
|             train_graphs.append(graph) | ||||
|         for graph in self.test_dataset: | ||||
|             test_graphs.append(graph) | ||||
|         return train_graphs, test_graphs | ||||
|  | ||||
|  | ||||
|     # def get_train_smiles(self): | ||||
|     #     filename = f'{self.task}.csv.gz' | ||||
|     #     df = pd.read_csv(f'{self.root_path}/raw/{filename}') | ||||
|     #     df_test = df.iloc[self.test_index] | ||||
|     #     df = df.iloc[self.train_index] | ||||
|     #     smiles_list = df['smiles'].tolist() | ||||
|     #     smiles_list_test = df_test['smiles'].tolist() | ||||
|     #     smiles_list = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiles_list] | ||||
|     #     smiles_list_test = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiles_list_test] | ||||
|     #     return smiles_list, smiles_list_test | ||||
|  | ||||
|     def get_train_smiles(self): | ||||
|         train_smiles = []    | ||||
|         test_smiles = [] | ||||
| @@ -319,6 +353,121 @@ class DataModule_original(AbstractDataModule): | ||||
|     def test_dataloader(self): | ||||
|         return self.test_loader | ||||
|  | ||||
| def new_graphs_to_json(graphs, filename): | ||||
|     source_name = "nasbench-201" | ||||
|     num_graph = len(graphs) | ||||
|  | ||||
|     node_name_list = [] | ||||
|     node_count_list = [] | ||||
|     node_name_list.append('*') | ||||
|      | ||||
|     for op_name in op_type: | ||||
|         node_name_list.append(op_name) | ||||
|         node_count_list.append(0)  | ||||
|      | ||||
|     node_count_list.append(0) | ||||
|     n_nodes_per_graph = [0] * num_graph | ||||
|     edge_count_list = [0, 0]  | ||||
|     valencies = [0] * (len(op_type) + 1) | ||||
|     transition_E = np.zeros((len(op_type) + 1, len(op_type) + 1, 2)) | ||||
|  | ||||
|     n_node_list = [] | ||||
|     n_edge_list = [] | ||||
|  | ||||
|     for graph in graphs: | ||||
|         ops = graph[1] | ||||
|         adj = graph[0] | ||||
|  | ||||
|         n_node = len(ops) | ||||
|         n_edge = len(ops) | ||||
|         n_node_list.append(n_node) | ||||
|         n_edge_list.append(n_edge) | ||||
|  | ||||
|         n_nodes_per_graph[n_node] += 1 | ||||
|         cur_node_count_arr = np.zeros(len(op_type) + 1) | ||||
|  | ||||
|         for op in ops: | ||||
|             node = op | ||||
|             # if node == '*': | ||||
|             #     node_count_list[-1] += 1 | ||||
|             #     cur_node_count_arr[-1] += 1 | ||||
|             # else: | ||||
|             node_count_list[node] += 1 | ||||
|             cur_node_count_arr[node] += 1 | ||||
|             try: | ||||
|                 valencies[node] += 1 | ||||
|             except: | ||||
|                 print('int(op_type[node])', int(node)) | ||||
|          | ||||
|         transition_E_temp = np.zeros((len(op_type) + 1, len(op_type) + 1, 2)) | ||||
|         for i in range(n_node): | ||||
|             for j in range(n_node): | ||||
|                 if i == j or adj[i][j] == 0: | ||||
|                     continue | ||||
|                 start_node, end_node = i, j | ||||
|                  | ||||
|                 start_index = ops[start_node] | ||||
|                 end_index = ops[end_node] | ||||
|                 bond_index = 1 | ||||
|                 edge_count_list[bond_index] += 2 | ||||
|                  | ||||
|                 transition_E[start_index, end_index, bond_index] += 2 | ||||
|                 transition_E[end_index, start_index, bond_index] += 2 | ||||
|                 transition_E_temp[start_index, end_index, bond_index] += 2 | ||||
|                 transition_E_temp[end_index, start_index, bond_index] += 2 | ||||
|  | ||||
|         edge_count_list[0] += n_node * (n_node - 1) - n_edge * 2 | ||||
|         cur_tot_edge = cur_node_count_arr.reshape(-1,1) * cur_node_count_arr.reshape(1,-1) * 2 | ||||
|         # print(f"cur_tot_edge={cur_tot_edge}, shape: {cur_tot_edge.shape}") | ||||
|         cur_tot_edge = cur_tot_edge - np.diag(cur_node_count_arr) * 2 | ||||
|         transition_E[:, :, 0] += cur_tot_edge - transition_E_temp.sum(axis=-1) | ||||
|         assert (cur_tot_edge > transition_E_temp.sum(axis=-1)).sum() >= 0 | ||||
|      | ||||
|     n_nodes_per_graph = np.array(n_nodes_per_graph) / np.sum(n_nodes_per_graph) | ||||
|     n_nodes_per_graph = n_nodes_per_graph.tolist()[:51] | ||||
|  | ||||
|     node_count_list = np.array(node_count_list) / np.sum(node_count_list) | ||||
|     print('processed meta info: ------', filename, '------') | ||||
|     print('len node_count_list', len(node_count_list)) | ||||
|     print('len node_name_list', len(node_name_list)) | ||||
|     active_nodes = np.array(node_name_list)[node_count_list > 0] | ||||
|     active_nodes = active_nodes.tolist() | ||||
|     node_count_list = node_count_list.tolist() | ||||
|  | ||||
|     edge_count_list = np.array(edge_count_list) / np.sum(edge_count_list) | ||||
|     edge_count_list = edge_count_list.tolist() | ||||
|     valencies = np.array(valencies) / np.sum(valencies) | ||||
|     valencies = valencies.tolist() | ||||
|  | ||||
|     no_edge = np.sum(transition_E, axis=-1) == 0 | ||||
|     first_elt = transition_E[:, :, 0] | ||||
|     first_elt[no_edge] = 1 | ||||
|     transition_E[:, :, 0] = first_elt | ||||
|  | ||||
|     transition_E = transition_E / np.sum(transition_E, axis=-1, keepdims=True) | ||||
|  | ||||
|     meta_dict = { | ||||
|         'source': source_name, | ||||
|         'num_graph': num_graph, | ||||
|         'n_nodes_per_graph': n_nodes_per_graph, | ||||
|         'max_n_nodes': max(n_node_list), | ||||
|         'max_n_edges': max(n_edge_list), | ||||
|         'node_type_list': node_count_list, | ||||
|         'edge_type_list': edge_count_list, | ||||
|         'valencies': valencies, | ||||
|         'active_nodes': active_nodes, | ||||
|         'num_active_nodes': len(active_nodes), | ||||
|         'transition_E': transition_E.tolist(), | ||||
|     } | ||||
|  | ||||
|     with open(f'/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench-201-meta.json', 'w') as f: | ||||
|         json.dump(meta_dict, f) | ||||
|      | ||||
|     return meta_dict | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| def graphs_to_json(graphs, filename): | ||||
|     bonds = { | ||||
|         'nor_conv_1x1': 1, | ||||
| @@ -466,7 +615,7 @@ def graphs_to_json(graphs, filename): | ||||
|         'atom_type_dist': atom_count_list, | ||||
|         'bond_type_dist': bond_count_list, | ||||
|         'valencies': valencies, | ||||
|         'active_atoms': [atom_name_list[i] for i in range(118) if atom_count_list[i] > 0], | ||||
|         'active_nodes': [atom_name_list[i] for i in range(118) if atom_count_list[i] > 0], | ||||
|         'num_atom_type': len([atom_name_list[i] for i in range(118) if atom_count_list[i] > 0]), | ||||
|         'transition_E': transition_E.tolist(), | ||||
|     } | ||||
| @@ -477,14 +626,17 @@ def graphs_to_json(graphs, filename): | ||||
| class Dataset(InMemoryDataset): | ||||
|     def __init__(self, source, root, target_prop=None, transform=None, pre_transform=None, pre_filter=None): | ||||
|         self.target_prop = target_prop | ||||
|         source = '/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth' | ||||
|         source = '/home/stud/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth' | ||||
|         self.source = source | ||||
|         self.api = API(source)  # Initialize NAS-Bench-201 API | ||||
|         print('API loaded') | ||||
|         # self.api = API(source)  # Initialize NAS-Bench-201 API | ||||
|         # print('API loaded') | ||||
|         super().__init__(root, transform, pre_transform, pre_filter) | ||||
|         print('Dataset initialized') | ||||
|         print(self.processed_paths[0]) | ||||
|         print(self.processed_paths[0]) #/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth.pt | ||||
|         self.data, self.slices = torch.load(self.processed_paths[0]) | ||||
|         print('Dataset initialized') | ||||
|         self.data.edge_attr = self.data.edge_attr.squeeze() | ||||
|         self.data.idx = torch.arange(len(self.data.y)) | ||||
|         print(f"self.data={self.data}, self.slices={self.slices}") | ||||
|  | ||||
|     @property | ||||
|     def raw_file_names(self): | ||||
| @@ -495,82 +647,172 @@ class Dataset(InMemoryDataset): | ||||
|         return [f'{self.source}.pt'] | ||||
|  | ||||
|     def process(self): | ||||
|         def parse_architecture_string(arch_str): | ||||
|             stages = arch_str.split('+') | ||||
|             nodes = ['input'] | ||||
|             edges = [] | ||||
|              | ||||
|             for stage in stages: | ||||
|                 operations = stage.strip('|').split('|') | ||||
|                 for op in operations: | ||||
|                     operation, idx = op.split('~') | ||||
|                     idx = int(idx) | ||||
|                     edges.append((idx, len(nodes)))  # Add edge from idx to the new node | ||||
|                     nodes.append(operation) | ||||
|             nodes.append('output')  # Add the output node | ||||
|             return nodes, edges | ||||
|         source = '/home/stud/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth' | ||||
|         self.api = API(source) | ||||
|  | ||||
|         def create_graph(nodes, edges): | ||||
|             G = nx.DiGraph() | ||||
|             for i, node in enumerate(nodes): | ||||
|                 G.add_node(i, label=node) | ||||
|             G.add_edges_from(edges) | ||||
|             return G | ||||
|  | ||||
|         def arch_to_graph(arch_str, sa, sc, target, target2=None, target3=None): | ||||
|             nodes, edges = parse_architecture_string(arch_str) | ||||
|  | ||||
|             node_labels = [bonds[node] for node in nodes]  # Replace with appropriate encoding if necessary | ||||
|             assert 0 not in node_labels, f'Invalid node label: {node_labels}' | ||||
|             x = torch.LongTensor(node_labels) | ||||
|             print(f'in initialize Dataset, arch_to_Graph x={x}') | ||||
|  | ||||
|             edges_list = [(start, end) for start, end in edges] | ||||
|             edge_type = [bonds[nodes[end]] for start, end in edges]  # Example: using end node type as edge type | ||||
|             edge_index = torch.tensor(edges_list, dtype=torch.long).t().contiguous() | ||||
|             edge_type = torch.tensor(edge_type, dtype=torch.long) | ||||
|             edge_attr = edge_type.view(-1, 1) | ||||
|  | ||||
|             if target3 is not None: | ||||
|                 y = torch.tensor([sa, sc, target, target2, target3], dtype=torch.float).view(1, -1) | ||||
|             elif target2 is not None: | ||||
|                 y = torch.tensor([sa, sc, target, target2], dtype=torch.float).view(1, -1) | ||||
|             else: | ||||
|                 y = torch.tensor([sa, sc, target], dtype=torch.float).view(1, -1) | ||||
|  | ||||
|             print(f'in initialize Dataset, Data_init, x={x}, y={y}, edge_index={edge_index}, edge_attr={edge_attr}') | ||||
|             data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) | ||||
|             return data, nodes | ||||
|  | ||||
|         bonds = { | ||||
|             'nor_conv_1x1': 1, | ||||
|             'nor_conv_3x3': 2, | ||||
|             'avg_pool_3x3': 3, | ||||
|             'skip_connect': 4, | ||||
|             'output': 5, | ||||
|             'none': 6, | ||||
|             'input': 7 | ||||
|         } | ||||
|  | ||||
|         # Prepare to process NAS-Bench-201 data | ||||
|         data_list = [] | ||||
|         len_data = len(self.api)  # Number of architectures | ||||
|         with tqdm(total=len_data) as pbar: | ||||
|             for arch_index in range(len_data): | ||||
|                 arch_info = self.api.query_meta_info_by_index(arch_index) | ||||
|                 arch_str = arch_info.arch_str | ||||
|                 sa = np.random.rand()  # Placeholder for synthetic accessibility | ||||
|                 sc = np.random.rand()  # Placeholder for substructure count | ||||
|                 target = np.random.rand()  # Placeholder for target value | ||||
|                 target2 = np.random.rand()  # Placeholder for second target value | ||||
|                 target3 = np.random.rand()  # Placeholder for third target value | ||||
|         len_data = len(self.api) | ||||
|  | ||||
|                 data, active_nodes = arch_to_graph(arch_str, sa, sc, target, target2, target3) | ||||
|         def graph_to_graph_data(graph): | ||||
|             ops = graph[1] | ||||
|             adj = graph[0] | ||||
|             nodes = [] | ||||
|             for op in ops: | ||||
|                 nodes.append(op_type[op]) | ||||
|             x = torch.LongTensor(nodes) | ||||
|  | ||||
|             edges_list = [] | ||||
|             edge_type = [] | ||||
|             for start in range(len(ops)): | ||||
|                 for end in range(len(ops)): | ||||
|                     if adj[start][end] == 1: | ||||
|                         edges_list.append((start, end)) | ||||
|                         edge_type.append(1) | ||||
|                         edges_list.append((end, start)) | ||||
|                         edge_type.append(1) | ||||
|              | ||||
|             edge_index = torch.tensor(edges_list, dtype=torch.long).t() | ||||
|             edge_type = torch.tensor(edge_type, dtype=torch.long) | ||||
|             edge_attr = edge_type | ||||
|             y = torch.tensor([0, 0], dtype=torch.float).view(1, -1) | ||||
|             data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i) | ||||
|             return data | ||||
|         graph_list = [] | ||||
|  | ||||
|         with tqdm(total = len_data) as pbar: | ||||
|             active_nodes = set() | ||||
|             for i in range(len_data): | ||||
|                 arch_info = self.api.query_meta_info_by_index(i) | ||||
|                 results = self.api.query_by_index(i, 'cifar100') | ||||
|                 nodes, edges = parse_architecture_string(arch_info.arch_str) | ||||
|                 adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges) | ||||
|                 for op in ops: | ||||
|                     if op not in active_nodes: | ||||
|                         active_nodes.add(op) | ||||
|                  | ||||
|                 graph_list.append({ | ||||
|                     "adj_matrix": adj_matrix, | ||||
|                     "ops": ops, | ||||
|                     "idx": i, | ||||
|                     "train": [{ | ||||
|                         "iepoch": result.get_train()['iepoch'], | ||||
|                         "loss": result.get_train()['loss'], | ||||
|                         "accuracy": result.get_train()['accuracy'], | ||||
|                         "cur_time": result.get_train()['cur_time'], | ||||
|                         "all_time": result.get_train()['all_time'], | ||||
|                         "seed": seed, | ||||
|                     }for seed, result in results.items()], | ||||
|                     "valid": [{ | ||||
|                         "iepoch": result.get_eval('x-valid')['iepoch'], | ||||
|                         "loss": result.get_eval('x-valid')['loss'], | ||||
|                         "accuracy": result.get_eval('x-valid')['accuracy'], | ||||
|                         "cur_time": result.get_eval('x-valid')['cur_time'], | ||||
|                         "all_time": result.get_eval('x-valid')['all_time'], | ||||
|                         "seed": seed, | ||||
|                     }for seed, result in results.items()], | ||||
|                     "test": [{ | ||||
|                         "iepoch": result.get_eval('x-test')['iepoch'], | ||||
|                         "loss": result.get_eval('x-test')['loss'], | ||||
|                         "accuracy": result.get_eval('x-test')['accuracy'], | ||||
|                         "cur_time": result.get_eval('x-test')['cur_time'], | ||||
|                         "all_time": result.get_eval('x-test')['all_time'], | ||||
|                         "seed": seed, | ||||
|                     }for seed, result in results.items()] | ||||
|                 }) | ||||
|                 data = graph_to_graph_data((adj_matrix, ops))  | ||||
|                 data_list.append(data) | ||||
|                 pbar.update(1) | ||||
|  | ||||
|          | ||||
|         for graph in graph_list: | ||||
|             adj_matrix = graph['adj_matrix'] | ||||
|             if isinstance(adj_matrix, np.ndarray): | ||||
|                 adj_matrix = adj_matrix.tolist() | ||||
|                 graph['adj_matrix'] = adj_matrix | ||||
|             ops = graph['ops'] | ||||
|             if isinstance(ops, np.ndarray): | ||||
|                 ops = ops.tolist() | ||||
|                 graph['ops'] = ops | ||||
|         with open(f'nasbench-201-graph.json', 'w') as f: | ||||
|             json.dump(graph_list, f) | ||||
|              | ||||
|         torch.save(self.collate(data_list), self.processed_paths[0]) | ||||
|  | ||||
|         # def parse_architecture_string(arch_str): | ||||
|         #     stages = arch_str.split('+') | ||||
|         #     nodes = ['input'] | ||||
|         #     edges = [] | ||||
|              | ||||
|         #     for stage in stages: | ||||
|         #         operations = stage.strip('|').split('|') | ||||
|         #         for op in operations: | ||||
|         #             operation, idx = op.split('~') | ||||
|         #             idx = int(idx) | ||||
|         #             edges.append((idx, len(nodes)))  # Add edge from idx to the new node | ||||
|         #             nodes.append(operation) | ||||
|         #     nodes.append('output')  # Add the output node | ||||
|         #     return nodes, edges | ||||
|  | ||||
|         # def create_graph(nodes, edges): | ||||
|         #     G = nx.DiGraph() | ||||
|         #     for i, node in enumerate(nodes): | ||||
|         #         G.add_node(i, label=node) | ||||
|         #     G.add_edges_from(edges) | ||||
|         #     return G | ||||
|  | ||||
|         # def arch_to_graph(arch_str, sa, sc, target, target2=None, target3=None): | ||||
|         #     nodes, edges = parse_architecture_string(arch_str) | ||||
|  | ||||
|         #     node_labels = [bonds[node] for node in nodes]  # Replace with appropriate encoding if necessary | ||||
|         #     assert 0 not in node_labels, f'Invalid node label: {node_labels}' | ||||
|         #     x = torch.LongTensor(node_labels) | ||||
|         #     print(f'in initialize Dataset, arch_to_Graph x={x}') | ||||
|  | ||||
|         #     edges_list = [(start, end) for start, end in edges] | ||||
|         #     edge_type = [bonds[nodes[end]] for start, end in edges]  # Example: using end node type as edge type | ||||
|         #     edge_index = torch.tensor(edges_list, dtype=torch.long).t().contiguous() | ||||
|         #     edge_type = torch.tensor(edge_type, dtype=torch.long) | ||||
|         #     edge_attr = edge_type.view(-1, 1) | ||||
|  | ||||
|         #     if target3 is not None: | ||||
|         #         y = torch.tensor([sa, sc, target, target2, target3], dtype=torch.float).view(1, -1) | ||||
|         #     elif target2 is not None: | ||||
|         #         y = torch.tensor([sa, sc, target, target2], dtype=torch.float).view(1, -1) | ||||
|         #     else: | ||||
|         #         y = torch.tensor([sa, sc, target], dtype=torch.float).view(1, -1) | ||||
|  | ||||
|         #     print(f'in initialize Dataset, Data_init, x={x}, y={y}, edge_index={edge_index}, edge_attr={edge_attr}') | ||||
|         #     data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) | ||||
|         #     return data, nodes | ||||
|  | ||||
|         # bonds = { | ||||
|         #     'nor_conv_1x1': 1, | ||||
|         #     'nor_conv_3x3': 2, | ||||
|         #     'avg_pool_3x3': 3, | ||||
|         #     'skip_connect': 4, | ||||
|         #     'output': 5, | ||||
|         #     'none': 6, | ||||
|         #     'input': 7 | ||||
|         # } | ||||
|  | ||||
|         # # Prepare to process NAS-Bench-201 data | ||||
|         # data_list = [] | ||||
|         # len_data = len(self.api)  # Number of architectures | ||||
|         # with tqdm(total=len_data) as pbar: | ||||
|         #     for arch_index in range(len_data): | ||||
|         #         arch_info = self.api.query_meta_info_by_index(arch_index) | ||||
|         #         arch_str = arch_info.arch_str | ||||
|         #         sa = np.random.rand()  # Placeholder for synthetic accessibility | ||||
|         #         sc = np.random.rand()  # Placeholder for substructure count | ||||
|         #         target = np.random.rand()  # Placeholder for target value | ||||
|         #         target2 = np.random.rand()  # Placeholder for second target value | ||||
|         #         target3 = np.random.rand()  # Placeholder for third target value | ||||
|  | ||||
|         #         data, active_nodes = arch_to_graph(arch_str, sa, sc, target, target2, target3) | ||||
|         #         data_list.append(data) | ||||
|         #         pbar.update(1) | ||||
|  | ||||
|         # torch.save(self.collate(data_list), self.processed_paths[0]) | ||||
|  | ||||
| class Dataset_origin(InMemoryDataset): | ||||
|     def __init__(self, source, root, target_prop=None, | ||||
|                  transform=None, pre_transform=None, pre_filter=None): | ||||
| @@ -656,7 +898,7 @@ class Dataset_origin(InMemoryDataset): | ||||
|         torch.save(self.collate(data_list), self.processed_paths[0]) | ||||
|  | ||||
| def parse_architecture_string(arch_str): | ||||
|     print(arch_str) | ||||
|     # print(arch_str) | ||||
|     steps = arch_str.split('+') | ||||
|     nodes = ['input']  # Start with input node | ||||
|     edges = [] | ||||
| @@ -676,7 +918,7 @@ def create_adj_matrix_and_ops(nodes, edges): | ||||
|         adj_matrix[src][dst] = 1 | ||||
|     return adj_matrix, nodes | ||||
| class DataInfos(AbstractDatasetInfos): | ||||
|     def __init__(self, datamodule, cfg): | ||||
|     def __init__(self, datamodule, cfg, dataset): | ||||
|         tasktype_dict = { | ||||
|             'hiv_b': 'classification', | ||||
|             'bace_b': 'classification', | ||||
| @@ -689,6 +931,7 @@ class DataInfos(AbstractDatasetInfos): | ||||
|         self.task = task_name | ||||
|         self.task_type = tasktype_dict.get(task_name, "regression") | ||||
|         self.ensure_connected = cfg.model.ensure_connected | ||||
|         # self.api = dataset.api | ||||
|  | ||||
|         datadir = cfg.dataset.datadir | ||||
|  | ||||
| @@ -699,36 +942,55 @@ class DataInfos(AbstractDatasetInfos): | ||||
|         length = 15625 | ||||
|         ops_type = {} | ||||
|         len_ops = set() | ||||
|         api = API('/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth') | ||||
|         for i in range(length): | ||||
|             arch_info = api.query_meta_info_by_index(i) | ||||
|             nodes, edges = parse_architecture_string(arch_info.arch_str) | ||||
|             adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)     | ||||
|             if i < 5: | ||||
|                 print("Adjacency Matrix:") | ||||
|                 print(adj_matrix) | ||||
|                 print("Operations List:") | ||||
|                 print(ops) | ||||
|             for op in ops: | ||||
|                 if op not in ops_type: | ||||
|                     ops_type[op] = len(ops_type) | ||||
|             len_ops.add(len(ops)) | ||||
|             graphs.append((adj_matrix, ops)) | ||||
|         # api = API('/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth') | ||||
|  | ||||
|         meta_dict = graphs_to_json(graphs, 'nasbench-201') | ||||
|  | ||||
|         def read_adj_ops_from_json(filename): | ||||
|             with open(filename, 'r') as json_file: | ||||
|                 data = json.load(json_file) | ||||
|  | ||||
|             adj_ops_pairs = [] | ||||
|             for item in data: | ||||
|                 adj_matrix = np.array(item['adj_matrix']) | ||||
|                 ops = item['ops'] | ||||
|                 ops = [op_type[op] for op in ops] | ||||
|                 adj_ops_pairs.append((adj_matrix, ops)) | ||||
|              | ||||
|             return adj_ops_pairs | ||||
|         # for i in range(length): | ||||
|         #     arch_info = self.api.query_meta_info_by_index(i) | ||||
|         #     nodes, edges = parse_architecture_string(arch_info.arch_str) | ||||
|         #     adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)     | ||||
|             # if i < 5: | ||||
|             #     print("Adjacency Matrix:") | ||||
|             #     print(adj_matrix) | ||||
|             #     print("Operations List:") | ||||
|             #     print(ops) | ||||
|             # for op in ops: | ||||
|             #     if op not in ops_type: | ||||
|             #         ops_type[op] = len(ops_type) | ||||
|             # len_ops.add(len(ops)) | ||||
|             # graphs.append((adj_matrix, ops)) | ||||
|         graphs = read_adj_ops_from_json(f'/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json') | ||||
|  | ||||
|         # check first five graphs | ||||
|         for i in range(5): | ||||
|             print(f'graph {i} : {graphs[i]}') | ||||
|         print(f'ops_type: {ops_type}') | ||||
|  | ||||
|         meta_dict = new_graphs_to_json(graphs, 'nasbench-201') | ||||
|         self.base_path = base_path | ||||
|         self.active_atoms = meta_dict['active_atoms'] | ||||
|         self.max_n_nodes = meta_dict['max_node'] | ||||
|         self.original_max_n_nodes = meta_dict['max_node'] | ||||
|         self.n_nodes = torch.Tensor(meta_dict['n_atoms_per_mol_dist']) | ||||
|         self.edge_types = torch.Tensor(meta_dict['bond_type_dist']) | ||||
|         self.active_nodes = meta_dict['active_nodes'] | ||||
|         self.max_n_nodes = meta_dict['max_n_nodes'] | ||||
|         self.original_max_n_nodes = meta_dict['max_n_nodes'] | ||||
|         self.n_nodes = torch.Tensor(meta_dict['n_nodes_per_graph']) | ||||
|         self.edge_types = torch.Tensor(meta_dict['edge_type_list']) | ||||
|         self.transition_E = torch.Tensor(meta_dict['transition_E']) | ||||
|  | ||||
|         self.atom_decoder = meta_dict['active_atoms'] | ||||
|         node_types = torch.Tensor(meta_dict['atom_type_dist']) | ||||
|         self.node_decoder = meta_dict['active_nodes'] | ||||
|         node_types = torch.Tensor(meta_dict['node_type_list']) | ||||
|         active_index = (node_types > 0).nonzero().squeeze() | ||||
|         self.node_types = torch.Tensor(meta_dict['atom_type_dist'])[active_index] | ||||
|         self.node_types = torch.Tensor(meta_dict['node_type_list'])[active_index] | ||||
|         self.nodes_dist = DistributionNodes(self.n_nodes) | ||||
|         self.active_index = active_index | ||||
|  | ||||
| @@ -923,11 +1185,11 @@ def compute_meta(root, source_name, train_index, test_index): | ||||
|         'transition_E': tansition_E.tolist(), | ||||
|         } | ||||
|  | ||||
|     with open(f'{root}/{source_name}.meta.json', "w") as f: | ||||
|     with open(f'/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench201.meta.json', "w") as f: | ||||
|         json.dump(meta_dict, f) | ||||
|      | ||||
|     return meta_dict | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     pass | ||||
|     dataset = Dataset(source='nasbench', root='/home/stud/hanzhang/nasbenchDiT/graph-dit', target_prop='Class', transform=None) | ||||
|   | ||||
| @@ -65,10 +65,11 @@ def reverse_tensor(x): | ||||
|  | ||||
| def sample_discrete_features(probX, probE, node_mask, step=None, add_nose=True): | ||||
|     ''' Sample features from multinomial distribution with given probabilities (probX, probE, proby) | ||||
|         :param probX: bs, n, dx_out        node features | ||||
|         :param probE: bs, n, n, de_out     edge features | ||||
|         :param proby: bs, dy_out           global features. | ||||
|         :param probX: bs, n, dx_out        node features        1200 8 7 | ||||
|         :param probE: bs, n, n, de_out     edge features        1200 8 8 2 | ||||
|         :param proby: bs, dy_out           global features.     1200 8 | ||||
|     ''' | ||||
|     # print(f"sample_discrete_features in: probX: {probX.shape}, probE: {probE.shape}, node_mask: {node_mask.shape}") | ||||
|     bs, n, _ = probX.shape | ||||
|  | ||||
|     # Noise X | ||||
| @@ -97,8 +98,11 @@ def sample_discrete_features(probX, probE, node_mask, step=None, add_nose=True): | ||||
|  | ||||
|     # Sample E | ||||
|     E_t = probE.multinomial(1).reshape(bs, n, n)    # (bs, n, n) | ||||
|     # print(f"sample_discrete_features out: X_t: {X_t.shape}, E_t: {E_t.shape}") | ||||
|     E_t = torch.triu(E_t, diagonal=1) | ||||
|     # print(f"sample_discrete_features out: X_t: {X_t.shape}, E_t: {E_t.shape}") | ||||
|     E_t = (E_t + torch.transpose(E_t, 1, 2)) | ||||
|     # print(f"sample_discrete_features out: X_t: {X_t.shape}, E_t: {E_t.shape}") | ||||
|  | ||||
|     return PlaceHolder(X=X_t, E=E_t, y=torch.zeros(bs, 0).type_as(X_t)) | ||||
|  | ||||
|   | ||||
| @@ -103,16 +103,25 @@ class MarginalTransition: | ||||
|         self.e_marginals = e_marginals # Dx, De | ||||
|         self.xe_conditions = xe_conditions | ||||
|  | ||||
|         self.u_x = x_marginals.unsqueeze(0).expand(self.X_classes, -1).unsqueeze(0) # 1, Dx, Dx | ||||
|         self.u_e = e_marginals.unsqueeze(0).expand(self.E_classes, -1).unsqueeze(0) # 1, De, De | ||||
|         self.u_xe = xe_conditions.unsqueeze(0) # 1, Dx, De | ||||
|         self.u_ex = ex_conditions.unsqueeze(0) # 1, De, Dx | ||||
|         self.u_x = x_marginals.unsqueeze(0).expand(self.X_classes, -1).unsqueeze(0) # 1, Dx, Dx 1 7 7 | ||||
|         self.u_e = e_marginals.unsqueeze(0).expand(self.E_classes, -1).unsqueeze(0) # 1, De, De 1 2 2 | ||||
|         self.u_xe = xe_conditions.unsqueeze(0) # 1, Dx, De 1 7 2 | ||||
|         self.u_ex = ex_conditions.unsqueeze(0) # 1, De, Dx 1 2 7 | ||||
|         self.u = self.get_union_transition(self.u_x, self.u_e, self.u_xe, self.u_ex, n_nodes) # 1, Dx + n*De, Dx + n*De | ||||
|         # print(f"Shape of u_x: {self.u_x.shape}") | ||||
|         # print(f"Shape of u_e: {self.u_e.shape}") | ||||
|         # print(f"Shape of u_xe: {self.u_xe.shape}") | ||||
|         # print(f"Shape of u_ex: {self.u_ex.shape}") | ||||
|         # print(f"Shape of u: {self.u.shape}") | ||||
|  | ||||
|     def get_union_transition(self, u_x, u_e, u_xe, u_ex, n_nodes): | ||||
|         # print(f"before processing Shape of u_e: {u_e.shape}") | ||||
|         # print(f"before processing Shape of u_ex: {u_ex.shape}") | ||||
|         u_e = u_e.repeat(1, n_nodes, n_nodes) # (1, n*de, n*de) | ||||
|         u_xe = u_xe.repeat(1, 1, n_nodes) # (1, dx, n*de) | ||||
|         u_ex = u_ex.repeat(1, n_nodes, 1) # (1, n*de, dx) | ||||
|         # print(f"After processing Shape of u_ex: {u_ex.shape}") | ||||
|         # print(f"After processing Shape of u_e: {u_e.shape}") | ||||
|         u0 = torch.cat([u_x, u_xe], dim=2) # (1, dx, dx + n*de) | ||||
|         u1 = torch.cat([u_ex, u_e], dim=2) # (1, n*de, dx + n*de) | ||||
|         u = torch.cat([u0, u1], dim=1) # (1, dx + n*de, dx + n*de) | ||||
|   | ||||
| @@ -13,11 +13,11 @@ from metrics.abstract_metrics import SumExceptBatchMetric, SumExceptBatchKL, NLL | ||||
| import utils | ||||
|  | ||||
| class Graph_DiT(pl.LightningModule): | ||||
|     # def __init__(self, cfg, dataset_infos, train_metrics, sampling_metrics, visualization_tools): | ||||
|     def __init__(self, cfg, dataset_infos, visualization_tools): | ||||
|     def __init__(self, cfg, dataset_infos, train_metrics, sampling_metrics, visualization_tools): | ||||
|     # def __init__(self, cfg, dataset_infos, visualization_tools): | ||||
|  | ||||
|         super().__init__() | ||||
|         # self.save_hyperparameters(ignore=['train_metrics', 'sampling_metrics']) | ||||
|         self.save_hyperparameters(ignore=['train_metrics', 'sampling_metrics']) | ||||
|         self.test_only = cfg.general.test_only | ||||
|         self.guidance_target = getattr(cfg.dataset, 'guidance_target', None) | ||||
|  | ||||
| @@ -57,8 +57,8 @@ class Graph_DiT(pl.LightningModule): | ||||
|         self.test_E_logp = SumExceptBatchMetric() | ||||
|         self.test_y_collection = [] | ||||
|  | ||||
|         # self.train_metrics = train_metrics | ||||
|         # self.sampling_metrics = sampling_metrics | ||||
|         self.train_metrics = train_metrics | ||||
|         self.sampling_metrics = sampling_metrics | ||||
|  | ||||
|         self.visualization_tools = visualization_tools | ||||
|         self.max_n_nodes = dataset_infos.max_n_nodes | ||||
| @@ -179,9 +179,9 @@ class Graph_DiT(pl.LightningModule): | ||||
|     @torch.no_grad() | ||||
|     def validation_step(self, data, i): | ||||
|         data_x = F.one_hot(data.x, num_classes=118).float()[:, self.active_index] | ||||
|         data_edge_attr = F.one_hot(data.edge_attr, num_classes=5).float() | ||||
|         data_edge_attr = F.one_hot(data.edge_attr, num_classes=10).float() | ||||
|         dense_data, node_mask = utils.to_dense(data_x, data.edge_index, data_edge_attr, data.batch, self.max_n_nodes) | ||||
|         dense_data = dense_data.mask(node_mask) | ||||
|         dense_data = dense_data.mask(node_mask, collapse=True) | ||||
|         noisy_data = self.apply_noise(dense_data.X, dense_data.E, data.y, node_mask) | ||||
|         pred = self.forward(noisy_data) | ||||
|         nll = self.compute_val_loss(pred, noisy_data, dense_data.X, dense_data.E, data.y, node_mask, test=False) | ||||
|   | ||||
| @@ -11,9 +11,13 @@ import utils | ||||
| from datasets import dataset | ||||
| from diffusion_model import Graph_DiT | ||||
| from metrics.molecular_metrics_train import TrainMolecularMetricsDiscrete | ||||
| from metrics.molecular_metrics_train import TrainGraphMetricsDiscrete | ||||
| from metrics.molecular_metrics_sampling import SamplingMolecularMetrics | ||||
| from metrics.molecular_metrics_sampling import SamplingGraphMetrics | ||||
|  | ||||
|  | ||||
| from analysis.visualization import MolecularVisualization | ||||
| from analysis.visualization import GraphVisualization | ||||
|  | ||||
| warnings.filterwarnings("ignore", category=UserWarning) | ||||
| torch.set_float32_matmul_precision("medium") | ||||
| @@ -78,15 +82,20 @@ def main(cfg: DictConfig): | ||||
|  | ||||
|     datamodule = dataset.DataModule(cfg) | ||||
|     datamodule.prepare_data() | ||||
|     dataset_infos = dataset.DataInfos(datamodule=datamodule, cfg=cfg) | ||||
|     # train_smiles, reference_smiles = datamodule.get_train_smiles() | ||||
|     dataset_infos = dataset.DataInfos(datamodule=datamodule, cfg=cfg, dataset=datamodule.dataset) | ||||
|     train_smiles, reference_smiles = datamodule.get_train_smiles() | ||||
|     # train_graphs, reference_graphs = datamodule.get_train_graphs() | ||||
|  | ||||
|     # get input output dimensions | ||||
|     dataset_infos.compute_input_output_dims(datamodule=datamodule) | ||||
|     # train_metrics = TrainMolecularMetricsDiscrete(dataset_infos) | ||||
|     train_metrics = TrainMolecularMetricsDiscrete(dataset_infos) | ||||
|     # train_metrics = TrainGraphMetricsDiscrete(dataset_infos) | ||||
|  | ||||
|     # sampling_metrics = SamplingMolecularMetrics( | ||||
|     #     dataset_infos, train_smiles, reference_smiles | ||||
|     sampling_metrics = SamplingMolecularMetrics( | ||||
|         dataset_infos, train_smiles, reference_smiles | ||||
|     ) | ||||
|     # sampling_metrics = SamplingGraphMetrics( | ||||
|     #     dataset_infos, train_graphs, reference_graphs | ||||
|     # ) | ||||
|     visualization_tools = MolecularVisualization(dataset_infos) | ||||
|  | ||||
| @@ -135,5 +144,65 @@ def main(cfg: DictConfig): | ||||
|     else: | ||||
|         trainer.test(model, datamodule=datamodule, ckpt_path=cfg.general.test_only) | ||||
|  | ||||
| @hydra.main( | ||||
|     version_base="1.1", config_path="../configs", config_name="config" | ||||
| ) | ||||
| def test(cfg: DictConfig): | ||||
|     datamodule = dataset.DataModule(cfg) | ||||
|     datamodule.prepare_data() | ||||
|     dataset_infos = dataset.DataInfos(datamodule=datamodule, cfg=cfg, dataset=datamodule.dataset) | ||||
|     train_graphs, reference_graphs = datamodule.get_train_graphs() | ||||
|  | ||||
|     dataset_infos.compute_input_output_dims(datamodule=datamodule) | ||||
|     train_metrics = TrainGraphMetricsDiscrete(dataset_infos) | ||||
|  | ||||
|     sampling_metrics = SamplingGraphMetrics( | ||||
|         dataset_infos, train_graphs, reference_graphs | ||||
|     ) | ||||
|  | ||||
|     visulization_tools = GraphVisualization(dataset_infos) | ||||
|  | ||||
|     model_kwargs = { | ||||
|         "dataset_infos": dataset_infos, | ||||
|         "train_metrics": train_metrics, | ||||
|         "sampling_metrics": sampling_metrics, | ||||
|         "visualization_tools": visulization_tools, | ||||
|     } | ||||
|  | ||||
|     if cfg.general.test_only: | ||||
|         cfg, _ = get_resume(cfg, model_kwargs) | ||||
|         os.chdir(cfg.general.test_only.split("checkpoints")[0]) | ||||
|     elif cfg.general.resume is not None: | ||||
|         cfg, _ = get_resume_adaptive(cfg, model_kwargs) | ||||
|         os.chdir(cfg.general.resume.split("checkpoints")[0]) | ||||
|     # os.environ["CUDA_VISIBLE_DEVICES"] = cfg.general.gpu_number | ||||
|     model = Graph_DiT(cfg=cfg, **model_kwargs) | ||||
|     trainer = Trainer( | ||||
|         gradient_clip_val=cfg.train.clip_grad, | ||||
|         # accelerator="cpu", | ||||
|         accelerator="gpu" | ||||
|         if torch.cuda.is_available() and cfg.general.gpus > 0 | ||||
|         else "cpu", | ||||
|         devices=[cfg.general.gpu_number] | ||||
|         if torch.cuda.is_available() and cfg.general.gpus > 0 | ||||
|         else None, | ||||
|         max_epochs=cfg.train.n_epochs, | ||||
|         enable_checkpointing=False, | ||||
|         check_val_every_n_epoch=cfg.train.check_val_every_n_epoch, | ||||
|         val_check_interval=cfg.train.val_check_interval, | ||||
|         strategy="ddp" if cfg.general.gpus > 1 else "auto", | ||||
|         enable_progress_bar=cfg.general.enable_progress_bar, | ||||
|         callbacks=[], | ||||
|         reload_dataloaders_every_n_epochs=0, | ||||
|         logger=[], | ||||
|     ) | ||||
|  | ||||
|     if not cfg.general.test_only: | ||||
|         print("start testing fit method") | ||||
|         trainer.fit(model, datamodule=datamodule, ckpt_path=cfg.general.resume) | ||||
|         if cfg.general.save_model: | ||||
|             trainer.save_checkpoint(f"checkpoints/{cfg.general.name}/last.ckpt") | ||||
|         trainer.test(model, datamodule=datamodule) | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
|     test() | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| ### packages for visualization | ||||
| from analysis.rdkit_functions import compute_molecular_metrics | ||||
| from analysis.rdkit_functions import compute_graph_metrics | ||||
| from mini_moses.metrics.metrics import compute_intermediate_statistics | ||||
| from metrics.property_metric import TaskModel | ||||
|  | ||||
| @@ -23,7 +24,121 @@ def result_to_csv(path, dict_data): | ||||
|             writer.writeheader() | ||||
|         writer.writerow(dict_data) | ||||
|  | ||||
| class SamplingGraphMetrics(nn.Module): | ||||
|     def __init__( | ||||
|             self, | ||||
|             dataset_infos, | ||||
|             train_graphs, | ||||
|             reference_graphs, | ||||
|             n_jobs=1, | ||||
|             device="cpu", | ||||
|             batch_size=512, | ||||
|     ): | ||||
|         super().__init__() | ||||
|         self.task_name = dataset_infos.task | ||||
|         self.dataset_infos = dataset_infos | ||||
|         self.active_nodes = dataset_infos.active_nodes | ||||
|         self.train_graphs = train_graphs | ||||
|  | ||||
|         self.stat_ref = None | ||||
|  | ||||
|         self.compute_config = { | ||||
|             "n_jobs": n_jobs, | ||||
|             "device": device, | ||||
|             "batch_size": batch_size, | ||||
|         } | ||||
|  | ||||
|         self.task_evaluator = { | ||||
|             'meta_taskname': dataset_infos.task, | ||||
|             # 'sas': None, | ||||
|             # 'scs': None | ||||
|         } | ||||
|  | ||||
|         for cur_task in dataset_infos.task.split("-")[:]: | ||||
|             model_path = os.path.join( | ||||
|                 dataset_infos.base_path, "data/evaluator", f"{cur_task}.joblib" | ||||
|             ) | ||||
|             os.makedirs(os.path.dirname(model_path), exist_ok=True) | ||||
|             evaluator = TaskModel(model_path, cur_task) | ||||
|             self.task_evaluator[cur_task] = evaluator | ||||
|  | ||||
|     def forward(self, graphs, targets, name, current_epoch, val_counter, test=False): | ||||
|         test = True | ||||
|         if isinstance(targets, list): | ||||
|             targets_cat = torch.cat(targets, dim=0) | ||||
|             targets_np = targets_cat.detach().cpu().numpy() | ||||
|         else: | ||||
|             targets_np = targets.detach().cpu().numpy() | ||||
|  | ||||
|         unique_graphs, all_graphs, all_metrics, targets_log = compute_graph_metrics( | ||||
|             graphs, | ||||
|             targets_np, | ||||
|             self.train_graphs, | ||||
|             self.stat_ref, | ||||
|             self.dataset_infos, | ||||
|             self.task_evaluator, | ||||
|             self.compute_config, | ||||
|         ) | ||||
|         print(f"all graphs: {all_graphs}") | ||||
|         print(f"all graphs[0]: {all_graphs[0]}") | ||||
|         tmp_graphs = all_graphs.copy() | ||||
|         str_graphs = [] | ||||
|         for graph in tmp_graphs: | ||||
|             node_types = graph[0] | ||||
|             edge_types = graph[1] | ||||
|             node_str = " ".join([str(node) for node in node_types]) | ||||
|             edge_str_list = [] | ||||
|             for i in range(len(node_types)): | ||||
|                 for j in range(len(node_types)): | ||||
|                     edge_str_list.append(str(edge_types[i][j])) | ||||
|                 edge_str_list.append("/n") | ||||
|             edge_str = " ".join(edge_str_list) | ||||
|             str_graphs.append(f"nodes: {node_str} /n edges: /n{edge_str}") | ||||
|  | ||||
|  | ||||
|         if test: | ||||
|             file_name = "final_graphs.txt" | ||||
|             with open(file_name, "w") as fp: | ||||
|                 all_tasks_name = list(self.task_evaluator.keys()) | ||||
|                 all_tasks_name = all_tasks_name.copy() | ||||
|                 if 'meta_taskname' in all_tasks_name: | ||||
|                     all_tasks_name.remove('meta_taskname') | ||||
|  | ||||
|                 all_tasks_str = "graph, " + ", ".join([f"input_{task}" for task in all_tasks_name] + [f"output_{task}" for task in all_tasks_name]) | ||||
|                 fp.write(all_tasks_str + "\n") | ||||
|                 for i, graph in enumerate(str_graphs): | ||||
|                     if targets_log is not None: | ||||
|                         all_result_str = f"{graph}, " + ", ".join([f"{targets_log['input_'+task][i]}" for task in all_tasks_name] + [f"{targets_log['output_'+task][i]}" for task in all_tasks_name]) | ||||
|                         fp.write(all_result_str + "\n") | ||||
|                     else: | ||||
|                         fp.write("%s\n" % graph) | ||||
|                 print("All graphs saved") | ||||
|         else: | ||||
|             result_path = os.path.join(os.getcwd(), f"graphs/{name}") | ||||
|             os.makedirs(result_path, exist_ok=True) | ||||
|             text_path = os.path.join( | ||||
|                 result_path, | ||||
|                 f"valid_unique_graphs_e{current_epoch}_b{val_counter}.txt", | ||||
|             ) | ||||
|             textfile = open(text_path, "w") | ||||
|             for graph in unique_graphs: | ||||
|                 textfile.write(graph + "\n") | ||||
|             textfile.close() | ||||
|          | ||||
|         all_logs = all_metrics | ||||
|         if test: | ||||
|             all_logs["log_name"] = "test" | ||||
|         else: | ||||
|             all_logs["log_name"] = ( | ||||
|                 "epoch" + str(current_epoch) + "_batch" + str(val_counter) | ||||
|             ) | ||||
|          | ||||
|         result_to_csv("output.csv", all_logs) | ||||
|         return str_graphs | ||||
|      | ||||
|     def reset(self): | ||||
|         pass | ||||
|              | ||||
| class SamplingMolecularMetrics(nn.Module): | ||||
|     def __init__( | ||||
|         self, | ||||
| @@ -40,21 +155,21 @@ class SamplingMolecularMetrics(nn.Module): | ||||
|         self.active_atoms = dataset_infos.active_atoms | ||||
|         self.train_smiles = train_smiles | ||||
|  | ||||
|         if reference_smiles is not None: | ||||
|             print( | ||||
|                 f"--- Computing intermediate statistics for training for #{len(reference_smiles)} smiles ---" | ||||
|             ) | ||||
|             start_time = time.time() | ||||
|             self.stat_ref = compute_intermediate_statistics( | ||||
|                 reference_smiles, n_jobs=n_jobs, device=device, batch_size=batch_size | ||||
|             ) | ||||
|             end_time = time.time() | ||||
|             elapsed_time = end_time - start_time | ||||
|             print( | ||||
|                 f"--- End computing intermediate statistics: using {elapsed_time:.2f}s ---" | ||||
|             ) | ||||
|         else: | ||||
|             self.stat_ref = None | ||||
|         # if reference_smiles is not None: | ||||
|         #     print( | ||||
|         #         f"--- Computing intermediate statistics for training for #{len(reference_smiles)} smiles ---" | ||||
|         #     ) | ||||
|         #     start_time = time.time() | ||||
|         #     self.stat_ref = compute_intermediate_statistics( | ||||
|         #         reference_smiles, n_jobs=n_jobs, device=device, batch_size=batch_size | ||||
|         #     ) | ||||
|         #     end_time = time.time() | ||||
|         #     elapsed_time = end_time - start_time | ||||
|         #     print( | ||||
|         #         f"--- End computing intermediate statistics: using {elapsed_time:.2f}s ---" | ||||
|         #     ) | ||||
|         # else: | ||||
|         self.stat_ref = None | ||||
|      | ||||
|         self.comput_config = { | ||||
|             "n_jobs": n_jobs, | ||||
|   | ||||
| @@ -35,7 +35,13 @@ class CEPerClass(Metric): | ||||
|  | ||||
|     def compute(self): | ||||
|         return self.total_ce / self.total_samples | ||||
| class NodeCE(CEPerClass): | ||||
|     def __init__(self, i): | ||||
|         super().__init__(i) | ||||
|  | ||||
| class EdgeCE(CEPerClass): | ||||
|     def __init__(self, i): | ||||
|         super().__init__(i) | ||||
|  | ||||
| class AtomCE(CEPerClass): | ||||
|     def __init__(self, i): | ||||
| @@ -65,6 +71,21 @@ class AromaticCE(CEPerClass): | ||||
|     def __init__(self, i): | ||||
|         super().__init__(i) | ||||
|  | ||||
| class NodeMetricsCE(MetricCollection): | ||||
|     def __init__(self, active_nodes): | ||||
|         metrics_list = [] | ||||
|  | ||||
|         for i, node_type in enumerate(active_nodes) : | ||||
|             metrics_list.append(type(f'{node_type}_CE', (NodeCE,), {})(i)) | ||||
|         super().__init__(metrics_list) | ||||
|  | ||||
| class EdgeMetricsCE(MetricCollection): | ||||
|     def __init__(self): | ||||
|         ce_no_bond = NoBondCE(0) | ||||
|         ce_SI = SingleCE(1) | ||||
|         ce_DO = DoubleCE(2) | ||||
|         ce_TR = TripleCE(3) | ||||
|         super().__init__([ce_no_bond, ce_SI]) | ||||
|  | ||||
| class AtomMetricsCE(MetricCollection): | ||||
|     def __init__(self, active_atoms): | ||||
| @@ -84,7 +105,47 @@ class BondMetricsCE(MetricCollection): | ||||
|         ce_TR = TripleCE(3) | ||||
|         super().__init__([ce_no_bond, ce_SI, ce_DO, ce_TR]) | ||||
|  | ||||
| #  | ||||
| # | ||||
|  | ||||
| class TrainGraphMetricsDiscrete(nn.Module): | ||||
|     def __init__(self, dataset_infos): | ||||
|         super().__init__() | ||||
|         active_nodes = dataset_infos.active_nodes | ||||
|         self.train_node_metrics = NodeMetricsCE(active_nodes=active_nodes) | ||||
|         self.train_edge_metrics = EdgeMetricsCE() | ||||
|  | ||||
|     def forward(self, masked_pred_X, masked_pred_E, true_X, true_E, log: bool): | ||||
|         self.train_node_metrics(masked_pred_X, true_X) | ||||
|         self.train_edge_metrics(masked_pred_E, true_E) | ||||
|         if log: | ||||
|             to_log = {} | ||||
|             for key, val in self.train_node_metrics.compute().items(): | ||||
|                 to_log['train/' + key] = val.item() | ||||
|             for key, val in self.train_edge_metrics.compute().items(): | ||||
|                 to_log['train/' + key] = val.item() | ||||
|  | ||||
|     def reset(self): | ||||
|         for metric in [self.train_node_metrics, self.train_edge_metrics]: | ||||
|             metric.reset() | ||||
|  | ||||
|     def log_epoch_metrics(self, current_epoch, log=True): | ||||
|         epoch_node_metrics = self.train_node_metrics.compute() | ||||
|         epoch_edge_metrics = self.train_edge_metrics.compute() | ||||
|  | ||||
|         to_log = {} | ||||
|         for key, val in epoch_node_metrics.items(): | ||||
|             to_log['train_epoch/' + key] = val.item() | ||||
|         for key, val in epoch_edge_metrics.items(): | ||||
|             to_log['train_epoch/' + key] = val.item() | ||||
|  | ||||
|         for key, val in epoch_node_metrics.items(): | ||||
|             epoch_node_metrics[key] = round(val.item(),4) | ||||
|         for key, val in epoch_edge_metrics.items(): | ||||
|             epoch_edge_metrics[key] = round(val.item(),4) | ||||
|  | ||||
|         if log: | ||||
|             print(f"Epoch {current_epoch}: {epoch_node_metrics} -- {epoch_edge_metrics}") | ||||
|  | ||||
| class TrainMolecularMetricsDiscrete(nn.Module): | ||||
|     def __init__(self, dataset_infos): | ||||
|         super().__init__() | ||||
|   | ||||
| @@ -15,6 +15,17 @@ from rdkit.Chem import AllChem | ||||
| from rdkit import DataStructs | ||||
| from rdkit.Chem import rdMolDescriptors | ||||
| rdBase.DisableLog('rdApp.error') | ||||
| import json | ||||
|  | ||||
| op_type = { | ||||
|     'nor_conv_1x1': 1, | ||||
|     'nor_conv_3x3': 2, | ||||
|     'avg_pool_3x3': 3, | ||||
|     'skip_connect': 4, | ||||
|     'output': 5, | ||||
|     'none': 6, | ||||
|     'input': 7 | ||||
| } | ||||
|  | ||||
| task_to_colname = { | ||||
|     'hiv_b': 'HIV_active', | ||||
| @@ -32,8 +43,10 @@ tasktype_name = { | ||||
|     'O2': 'regression', | ||||
|     'N2': 'regression', | ||||
|     'CO2': 'regression', | ||||
|     'nasbench201': 'regression', | ||||
| } | ||||
|  | ||||
|  | ||||
| class TaskModel(): | ||||
|     """Scores based on an ECFP classifier.""" | ||||
|     def __init__(self, model_path, task_name): | ||||
| @@ -55,8 +68,48 @@ class TaskModel(): | ||||
|             perfermance = self.train() | ||||
|             dump(self.model, model_path) | ||||
|             print('Oracle peformance: ', perfermance) | ||||
|  | ||||
|     def train(self): | ||||
|         def read_adj_ops_from_json(filename): | ||||
|             with open(filename, 'r') as json_file: | ||||
|                 data = json.load(json_file) | ||||
|  | ||||
|             adj_ops_pairs = [] | ||||
|             for item in data: | ||||
|                 adj_matrix = np.array(item['adj_matrix']) | ||||
|                 ops = item['ops'] | ||||
|                 acc = item['train'][0]['accuracy'] | ||||
|                 adj_ops_pairs.append((adj_matrix, ops, acc)) | ||||
|              | ||||
|             return adj_ops_pairs | ||||
|         def feature_from_adj_and_ops(adj, ops): | ||||
|             return np.concatenate([adj.flatten(), ops]) | ||||
|         filename = '/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json' | ||||
|         graphs = read_adj_ops_from_json(filename) | ||||
|         adjs = [] | ||||
|         opss = [] | ||||
|         accs = [] | ||||
|         features = [] | ||||
|         for graph in graphs: | ||||
|             adj, ops, acc=graph | ||||
|             op_code = [op_type[op] for op in ops] | ||||
|             adjs.append(adj) | ||||
|             opss.append(op_code) | ||||
|             accs.append(acc) | ||||
|             features.append(feature_from_adj_and_ops(adj, op_code)) | ||||
|         features = np.array(features) | ||||
|         labels = np.array(accs) | ||||
|  | ||||
|         mask = ~np.isnan(labels) | ||||
|         labels = labels[mask] | ||||
|         features = features[mask] | ||||
|         # features = str(features) | ||||
|         self.model.fit(features, labels) | ||||
|         y_pred = self.model.predict(features) | ||||
|         perf = self.metric_func(labels, y_pred) | ||||
|         print(f'{self.task_name} performance: {perf}') | ||||
|         return perf | ||||
|  | ||||
|     def train__(self): | ||||
|         data_path = os.path.dirname(self.model_path) | ||||
|         data_path = os.path.join(os.path.dirname(self.model_path), '..', f'raw/{self.task_name}.csv.gz') | ||||
|         df = pd.read_csv(data_path) | ||||
| @@ -84,7 +137,7 @@ class TaskModel(): | ||||
|         print(f'{self.task_name} performance: {perf}') | ||||
|         return perf | ||||
|  | ||||
|     def __call__(self, smiles_list): | ||||
|     def __call(self, smiles_list): | ||||
|         fps = [] | ||||
|         mask = [] | ||||
|         for i,smiles in enumerate(smiles_list): | ||||
| @@ -101,6 +154,54 @@ class TaskModel(): | ||||
|         scores = scores * np.array(mask) | ||||
|         return np.float32(scores) | ||||
|  | ||||
|     def __call__(self, graph_list): | ||||
|         # def read_adj_ops_from_json(filename): | ||||
|         #     with open(filename, 'r') as json_file: | ||||
|         #         data = json.load(json_file) | ||||
|  | ||||
|         #     adj_ops_pairs = [] | ||||
|         #     for item in data: | ||||
|         #         adj_matrix = np.array(item['adj_matrix']) | ||||
|         #         ops = item['ops'] | ||||
|         #         acc = item['train'][0]['accuracy'] | ||||
|         #         adj_ops_pairs.append((adj_matrix, ops, acc)) | ||||
|              | ||||
|         #     return adj_ops_pairs | ||||
|         def feature_from_adj_and_ops(ops, adj): | ||||
|             return np.concatenate([adj.flatten(), ops]) | ||||
|         # filename = '/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json' | ||||
|         # graphs = read_adj_ops_from_json(filename) | ||||
|         # adjs = [] | ||||
|         # opss = [] | ||||
|         # accs = [] | ||||
|         # features = [] | ||||
|         # for graph in graphs: | ||||
|         #     adj, ops, acc=graph | ||||
|         #     op_code = [op_type[op] for op in ops] | ||||
|         #     adjs.append(adj) | ||||
|         #     opss.append(op_code) | ||||
|         #     accs.append(acc) | ||||
|         features = [] | ||||
|         print(f"graphlist: {graph_list[0]}") | ||||
|         print(f"len graphlist: {len(graph_list)}")  | ||||
|         for op_code, adj in graph_list: | ||||
|             features.append(feature_from_adj_and_ops(op_code, adj)) | ||||
|         print(f"len features: {len(features)}") | ||||
|         # print(f"features: {features[0].shape}") | ||||
|         features = np.stack(features) | ||||
|         features = features.astype(np.float32) | ||||
|         print(f"features shape: {features.shape}") | ||||
|  | ||||
|  | ||||
|         fps = features | ||||
|         if 'classification' in self.task_type: | ||||
|             scores = self.model.predict_proba(fps)[:, 1] | ||||
|         else: | ||||
|             scores = self.model.predict(fps) | ||||
|         # scores = scores * np.array(mask) | ||||
|         return np.float32(scores) | ||||
|  | ||||
|  | ||||
|     @classmethod | ||||
|     def fingerprints_from_mol(cls, mol):  # use ECFP4 | ||||
|         features_vec = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048) | ||||
|   | ||||
| @@ -87,7 +87,7 @@ class Denoiser(nn.Module): | ||||
|     def forward(self, x, e, node_mask, y, t, unconditioned): | ||||
|          | ||||
|         print("Denoiser Forward") | ||||
|         print(x.shape, e.shape, y.shape, t.shape, unconditioned) | ||||
|         # print(x.shape, e.shape, y.shape, t.shape, unconditioned) | ||||
|         force_drop_id = torch.zeros_like(y.sum(-1)) | ||||
|         # drop the nan values | ||||
|         force_drop_id[torch.isnan(y.sum(-1))] = 1 | ||||
| @@ -98,32 +98,32 @@ class Denoiser(nn.Module): | ||||
|         # bs = batch size, n = number of nodes | ||||
|         bs, n, _ = x.size() | ||||
|         x = torch.cat([x, e.reshape(bs, n, -1)], dim=-1) | ||||
|         print("X after concat with E") | ||||
|         print(x.shape) | ||||
|         # print("X after concat with E") | ||||
|         # print(x.shape) | ||||
|         # self.x_embedder = nn.Linear(Xdim + max_n_nodes * Edim, hidden_size, bias=False) | ||||
|         x = self.x_embedder(x) | ||||
|         print("X after x_embedder") | ||||
|         print(x.shape) | ||||
|         # print("X after x_embedder") | ||||
|         # print(x.shape) | ||||
|  | ||||
|         # self.t_embedder = TimestepEmbedder(hidden_size) | ||||
|         c1 = self.t_embedder(t) | ||||
|         print("C1 after t_embedder") | ||||
|         print(c1.shape) | ||||
|         # print("C1 after t_embedder") | ||||
|         # print(c1.shape) | ||||
|         for i in range(1, self.ydim): | ||||
|             if i == 1: | ||||
|                 c2 = self.y_embedding_list[i-1](y[:, :2], self.training, force_drop_id, t) | ||||
|             else: | ||||
|                 c2 = c2 + self.y_embedding_list[i-1](y[:, i:i+1], self.training, force_drop_id, t) | ||||
|         print("C2 after y_embedding_list") | ||||
|         print(c2.shape) | ||||
|         print("C1 + C2") | ||||
|         # print("C2 after y_embedding_list") | ||||
|         # print(c2.shape) | ||||
|         # print("C1 + C2") | ||||
|         c = c1 + c2 | ||||
|         print(c.shape) | ||||
|         # print(c.shape) | ||||
|          | ||||
|         for i, block in enumerate(self.encoders): | ||||
|             x = block(x, c, node_mask) | ||||
|         print("X after block") | ||||
|         print(x.shape) | ||||
|         # print("X after block") | ||||
|         # print(x.shape) | ||||
|  | ||||
|         # X: B * N * dx, E: B * N * N * de | ||||
|         X, E, y = self.out_layer(x, x_in, e_in, c, t, node_mask) | ||||
|   | ||||
| @@ -46,13 +46,17 @@ def unnormalize(X, E, y, norm_values, norm_biases, node_mask, collapse=False): | ||||
|  | ||||
|  | ||||
| def to_dense(x, edge_index, edge_attr, batch, max_num_nodes=None): | ||||
|     # print(f"to dense X: {x.shape}, edge_index: {edge_index.shape}, edge_attr: {edge_attr.shape}, batch: {batch}, max_num_nodes: {max_num_nodes}") | ||||
|     X, node_mask = to_dense_batch(x=x, batch=batch, max_num_nodes=max_num_nodes) | ||||
|     # node_mask = node_mask.float() | ||||
|     edge_index, edge_attr = torch_geometric.utils.remove_self_loops(edge_index, edge_attr) | ||||
|     if max_num_nodes is None: | ||||
|         max_num_nodes = X.size(1) | ||||
|     # print(f"to dense X: {X.shape}, edge_index: {edge_index.shape}, edge_attr: {edge_attr.shape}, batch: {batch}, max_num_nodes: {max_num_nodes}") | ||||
|     E = to_dense_adj(edge_index=edge_index, batch=batch, edge_attr=edge_attr, max_num_nodes=max_num_nodes) | ||||
|     E = encode_no_edge(E) | ||||
|     # print(f"to dense X: {X.shape}, edge_index: {edge_index.shape}, edge_attr: {edge_attr.shape}, batch: {batch}, max_num_nodes: {max_num_nodes}") | ||||
|     # print(f"to dense X: {X.shape}, E: {E.shape}, batch: {batch}, lenE: {len(E)}") | ||||
|     return PlaceHolder(X=X, E=E, y=None), node_mask | ||||
|  | ||||
|  | ||||
| @@ -119,6 +123,7 @@ class PlaceHolder: | ||||
|         x_mask = node_mask.unsqueeze(-1)          # bs, n, 1 | ||||
|         e_mask1 = x_mask.unsqueeze(2)             # bs, n, 1, 1 | ||||
|         e_mask2 = x_mask.unsqueeze(1)             # bs, 1, n, 1 | ||||
|         # print(f"mask X: {self.X.shape}, E: {self.E.shape}, node_mask: {node_mask.shape}, x_mask: {x_mask.shape}, e_mask1: {e_mask1.shape}, e_mask2: {e_mask2.shape}") | ||||
|  | ||||
|         if collapse: | ||||
|             self.X = torch.argmax(self.X, dim=-1) | ||||
| @@ -127,8 +132,13 @@ class PlaceHolder: | ||||
|             self.X[node_mask == 0] = - 1 | ||||
|             self.E[(e_mask1 * e_mask2).squeeze(-1) == 0] = - 1 | ||||
|         else: | ||||
|             # print(f"X: {self.X.shape}, E: {self.E.shape}") | ||||
|             # print(f"X: {self.X}, E: {self.E}") | ||||
|             # print(f"x_mask: {x_mask}, e_mask1: {e_mask1}, e_mask2: {e_mask2}") | ||||
|             self.X = self.X * x_mask | ||||
|             self.E = self.E * e_mask1 * e_mask2 | ||||
|             # print(f"X: {self.X.shape}, E: {self.E.shape}") | ||||
|             # print(f"X: {self.X}, E: {self.E}") | ||||
|             assert torch.allclose(self.E, torch.transpose(self.E, 1, 2)) | ||||
|         return self | ||||
|  | ||||
|   | ||||
							
								
								
									
										374
									
								
								graph_dit/workingdoc.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										374
									
								
								graph_dit/workingdoc.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,374 @@ | ||||
| 0626 | ||||
| **dataset.py** | ||||
| ## class DataModule: | ||||
|  | ||||
| ### def prepare_data(self) -> None: | ||||
|  | ||||
|     dataset = Dataset(source=source, root=root_path, target_prop=target, transform=None) | ||||
|  | ||||
| ## class Dataset: | ||||
|  | ||||
| Dataset is the subclass of `InMemoryDataset` from `torch_geometric.data` module. | ||||
|  | ||||
|  | ||||
| ```python | ||||
| super().__init__(root, transform, pre_transform) | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ### def __init__(self, source, root, target_prop=None, transform=None, pre_transform=None, pre_filter=None): | ||||
|  | ||||
| initiliaze the nasbench api and load the dataset. | ||||
|  | ||||
| `self.data` and `self.slices` are loaded from the nasbench api. | ||||
|  | ||||
| try to print the data of `self.data` and `self.slices` | ||||
| get | ||||
|  | ||||
| ```text | ||||
| self.data=Data(x=[125000], edge_index=[2, 93750], edge_attr=[93750, 1], y=[15625, 5]),  | ||||
| self.slices=defaultdict(<class 'dict'>, {'x': tensor([     0,      8,     16,  ..., 124984, 124992, 125000]),  | ||||
| 'edge_index': tensor([    0,     6,    12,  ..., 93738, 93744, 93750]),  | ||||
| 'edge_attr': tensor([    0,     6,    12,  ..., 93738, 93744, 93750]),  | ||||
| 'y': tensor([    0,     1,     2,  ..., 15623, 15624, 15625])}) | ||||
| ``` | ||||
|  | ||||
| The original get : | ||||
|  | ||||
| <details> | ||||
| <summary>click to expand</summary> | ||||
|  | ||||
| ```text | ||||
| self.data: Data(x=[16599], edge_index=[2, 36132], edge_attr=[36132], y=[553, 5], idx=[553]), | ||||
| self.slices: defaultdict(<class 'dict'>, {'x': tensor([    0,     9,    32,    56,    78,   113,   138,   157,   173,   203, | ||||
|           219,   262,   299,   326,   350,   391,   432,   466,   502,   528, | ||||
|           547,   583,   589,   599,   609,   622,   637,   652,   664,   675, | ||||
|           687,   703,   715,   719,   729,   735,   742,   749,   754,   770, | ||||
|           799,   836,   840,   846,   878,   917,   954,   991,  1022,  1065, | ||||
|          1096,  1127,  1158,  1197,  1228,  1259,  1298,  1343,  1381,  1412, | ||||
|          1457,  1500,  1539,  1583,  1601,  1640,  1679,  1688,  1733,  1768, | ||||
|          1797,  1830,  1870,  1901,  1926,  1950,  1971,  2000,  2043,  2090, | ||||
|          2137,  2176,  2220,  2261,  2301,  2337,  2378,  2420,  2460,  2508, | ||||
|          2549,  2580,  2615,  2644,  2673,  2717,  2748,  2780,  2809,  2833, | ||||
|          2865,  2875,  2884,  2923,  2966,  3006,  3017,  3028,  3048,  3079, | ||||
|          3090,  3101,  3110,  3115,  3129,  3155,  3173,  3194,  3224,  3256, | ||||
|          3299,  3336,  3379,  3426,  3470,  3514,  3542,  3566,  3587,  3615, | ||||
|          3636,  3664,  3684,  3698,  3728,  3754,  3780,  3796,  3808,  3825, | ||||
|          3853,  3881,  3906,  3924,  3949,  3982,  4007,  4044,  4092,  4128, | ||||
|          4174,  4209,  4244,  4292,  4339,  4372,  4419,  4468,  4502,  4545, | ||||
|          4588,  4609,  4638,  4688,  4717,  4767,  4814,  4861,  4894,  4935, | ||||
|          4985,  5023,  5057,  5080,  5114,  5148,  5156,  5202,  5251,  5299, | ||||
|          5348,  5367,  5376,  5386,  5400,  5413,  5429,  5448,  5474,  5485, | ||||
|          5497,  5513,  5519,  5558,  5592,  5631,  5656,  5679,  5705,  5734, | ||||
|          5763,  5788,  5813,  5838,  5863,  5887,  5911,  5935,  5959,  5982, | ||||
|          6027,  6072,  6097,  6133,  6179,  6211,  6250,  6297,  6338,  6370, | ||||
|          6411,  6458,  6493,  6528,  6576,  6615,  6654,  6694,  6733,  6760, | ||||
|          6784,  6816,  6861,  6907,  6944,  6982,  7027,  7073,  7117,  7160, | ||||
|          7203,  7243,  7283,  7323,  7360,  7407,  7421,  7457,  7502,  7543, | ||||
|          7563,  7585,  7605,  7624,  7665,  7706,  7717,  7754,  7791,  7815, | ||||
|          7839,  7863,  7908,  7952,  7984,  8010,  8035,  8073,  8117,  8143, | ||||
|          8189,  8223,  8271,  8306,  8353,  8378,  8415,  8449,  8483,  8520, | ||||
|          8558,  8605,  8636,  8680,  8727,  8762,  8809,  8823,  8854,  8879, | ||||
|          8915,  8954,  8997,  9039,  9078,  9120,  9153,  9195,  9237,  9272, | ||||
|          9314,  9356,  9389,  9400,  9449,  9497,  9536,  9571,  9621,  9662, | ||||
|          9698,  9740,  9783,  9825,  9871,  9915,  9965, 10006, 10050, 10097, | ||||
|         10138, 10188, 10232, 10256, 10267, 10287, 10301, 10344, 10358, 10373, | ||||
|         10387, 10429, 10441, 10472, 10480, 10500, 10510, 10521, 10533, 10552, | ||||
|         10583, 10604, 10621, 10635, 10647, 10660, 10676, 10697, 10728, 10758, | ||||
|         10789, 10799, 10813, 10821, 10833, 10850, 10858, 10869, 10917, 10926, | ||||
|         10936, 10948, 10972, 10992, 11011, 11024, 11040, 11059, 11065, 11072, | ||||
|         11082, 11099, 11107, 11115, 11124, 11135, 11147, 11167, 11178, 11184, | ||||
|         11198, 11209, 11220, 11226, 11242, 11248, 11261, 11269, 11288, 11308, | ||||
|         11335, 11363, 11392, 11415, 11435, 11449, 11475, 11518, 11537, 11550, | ||||
|         11565, 11578, 11599, 11614, 11631, 11674, 11717, 11738, 11771, 11811, | ||||
|         11831, 11846, 11855, 11869, 11877, 11884, 11889, 11895, 11917, 11956, | ||||
|         11968, 11979, 12018, 12029, 12036, 12041, 12051, 12061, 12067, 12086, | ||||
|         12109, 12134, 12165, 12191, 12232, 12273, 12307, 12354, 12385, 12433, | ||||
|         12472, 12518, 12565, 12611, 12657, 12703, 12742, 12788, 12836, 12882, | ||||
|         12928, 12967, 13013, 13052, 13091, 13130, 13173, 13214, 13264, 13314, | ||||
|         13364, 13403, 13450, 13497, 13544, 13589, 13634, 13677, 13717, 13752, | ||||
|         13793, 13828, 13855, 13890, 13925, 13965, 14011, 14057, 14105, 14151, | ||||
|         14190, 14238, 14285, 14313, 14343, 14362, 14405, 14443, 14475, 14509, | ||||
|         14537, 14553, 14597, 14636, 14647, 14658, 14705, 14736, 14786, 14810, | ||||
|         14830, 14849, 14869, 14910, 14950, 14956, 14994, 15042, 15077, 15125, | ||||
|         15172, 15196, 15204, 15236, 15268, 15311, 15350, 15393, 15433, 15466, | ||||
|         15499, 15530, 15565, 15600, 15639, 15675, 15711, 15750, 15782, 15814, | ||||
|         15849, 15882, 15921, 15956, 15999, 16038, 16078, 16118, 16155, 16182, | ||||
|         16209, 16235, 16261, 16290, 16319, 16345, 16370, 16415, 16460, 16503, | ||||
|         16530, 16539, 16585, 16599]),  | ||||
|         'edge_index': tensor([    0,    18,    70,   124,   172,   250,   308,   350,   384,   454, | ||||
|           488,   590,   674,   736,   790,   882,   976,  1050,  1134,  1194, | ||||
|          1236,  1318,  1328,  1348,  1368,  1394,  1428,  1462,  1488,  1510, | ||||
|          1534,  1570,  1594,  1600,  1620,  1630,  1642,  1654,  1662,  1694, | ||||
|          1762,  1850,  1856,  1866,  1938,  2026,  2110,  2194,  2264,  2362, | ||||
|          2432,  2502,  2572,  2662,  2732,  2802,  2890,  2992,  3078,  3148, | ||||
|          3250,  3348,  3436,  3536,  3574,  3662,  3750,  3768,  3870,  3946, | ||||
|          4008,  4080,  4168,  4238,  4294,  4346,  4392,  4456,  4556,  4664, | ||||
|          4768,  4856,  4956,  5050,  5140,  5220,  5312,  5406,  5496,  5604, | ||||
|          5698,  5766,  5844,  5908,  5972,  6072,  6140,  6210,  6274,  6326, | ||||
|          6396,  6414,  6432,  6520,  6616,  6704,  6726,  6748,  6790,  6850, | ||||
|          6872,  6894,  6910,  6918,  6946,  6998,  7034,  7076,  7144,  7212, | ||||
|          7308,  7386,  7476,  7580,  7676,  7772,  7832,  7882,  7926,  7984, | ||||
|          8028,  8086,  8128,  8156,  8220,  8276,  8330,  8360,  8382,  8416, | ||||
|          8474,  8532,  8584,  8620,  8672,  8742,  8794,  8878,  8984,  9062, | ||||
|          9164,  9240,  9320,  9426,  9532,  9606,  9710,  9818,  9892,  9986, | ||||
|         10080, 10124, 10186, 10298, 10360, 10472, 10578, 10684, 10756, 10846, | ||||
|         10956, 11040, 11114, 11162, 11236, 11310, 11324, 11424, 11532, 11638, | ||||
|         11744, 11784, 11800, 11818, 11844, 11868, 11898, 11934, 11988, 12008, | ||||
|         12030, 12060, 12070, 12156, 12230, 12316, 12370, 12416, 12474, 12538, | ||||
|         12602, 12658, 12712, 12766, 12820, 12872, 12924, 12976, 13028, 13078, | ||||
|         13180, 13282, 13338, 13418, 13518, 13588, 13674, 13776, 13866, 13936, | ||||
|         14028, 14136, 14214, 14292, 14400, 14488, 14576, 14666, 14754, 14814, | ||||
|         14866, 14940, 15038, 15140, 15224, 15310, 15410, 15512, 15610, 15708, | ||||
|         15802, 15890, 15978, 16066, 16144, 16248, 16276, 16354, 16454, 16548, | ||||
|         16590, 16636, 16678, 16718, 16808, 16898, 16920, 17000, 17080, 17132, | ||||
|         17184, 17236, 17336, 17434, 17504, 17560, 17612, 17694, 17788, 17844, | ||||
|         17948, 18022, 18128, 18204, 18306, 18358, 18436, 18508, 18580, 18660, | ||||
|         18742, 18844, 18908, 19002, 19106, 19182, 19286, 19314, 19382, 19436, | ||||
|         19514, 19600, 19696, 19790, 19876, 19968, 20042, 20134, 20226, 20304, | ||||
|         20396, 20488, 20562, 20584, 20696, 20802, 20890, 20968, 21078, 21170, | ||||
|         21248, 21342, 21438, 21536, 21644, 21748, 21860, 21954, 22054, 22162, | ||||
|         22254, 22366, 22464, 22516, 22538, 22580, 22608, 22704, 22732, 22762, | ||||
|         22788, 22882, 22906, 22976, 22990, 23032, 23050, 23070, 23092, 23130, | ||||
|         23192, 23232, 23264, 23290, 23312, 23336, 23366, 23408, 23472, 23534, | ||||
|         23598, 23616, 23642, 23656, 23678, 23712, 23726, 23746, 23854, 23870, | ||||
|         23888, 23912, 23960, 24000, 24038, 24062, 24092, 24128, 24138, 24150, | ||||
|         24168, 24202, 24218, 24232, 24248, 24270, 24294, 24332, 24354, 24364, | ||||
|         24392, 24412, 24434, 24444, 24476, 24486, 24512, 24526, 24564, 24606, | ||||
|         24666, 24728, 24794, 24844, 24886, 24914, 24970, 25062, 25104, 25130, | ||||
|         25164, 25192, 25236, 25266, 25300, 25388, 25476, 25520, 25594, 25684, | ||||
|         25724, 25754, 25770, 25798, 25812, 25824, 25832, 25842, 25888, 25976, | ||||
|         26000, 26022, 26110, 26132, 26144, 26152, 26170, 26188, 26198, 26236, | ||||
|         26284, 26338, 26406, 26462, 26552, 26644, 26718, 26822, 26886, 26992, | ||||
|         27078, 27182, 27288, 27390, 27492, 27594, 27680, 27782, 27890, 27992, | ||||
|         28094, 28180, 28282, 28368, 28454, 28542, 28638, 28730, 28840, 28950, | ||||
|         29060, 29146, 29250, 29354, 29458, 29558, 29658, 29752, 29838, 29912, | ||||
|         30000, 30074, 30130, 30204, 30278, 30364, 30468, 30570, 30676, 30778, | ||||
|         30864, 30972, 31076, 31136, 31194, 31232, 31326, 31408, 31476, 31550, | ||||
|         31610, 31640, 31736, 31824, 31846, 31870, 31974, 32042, 32148, 32204, | ||||
|         32248, 32290, 32334, 32424, 32512, 32522, 32608, 32714, 32790, 32900, | ||||
|         33008, 33058, 33072, 33142, 33212, 33312, 33394, 33490, 33578, 33648, | ||||
|         33718, 33784, 33858, 33932, 34020, 34100, 34180, 34262, 34330, 34398, | ||||
|         34472, 34542, 34624, 34698, 34794, 34882, 34970, 35058, 35140, 35200, | ||||
|         35260, 35318, 35376, 35440, 35504, 35562, 35618, 35722, 35826, 35926, | ||||
|         35982, 36000, 36104, 36132]),  | ||||
|         'edge_attr': tensor([    0,    18,    70,   124,   172,   250,   308,   350,   384,   454, | ||||
|           488,   590,   674,   736,   790,   882,   976,  1050,  1134,  1194, | ||||
|          1236,  1318,  1328,  1348,  1368,  1394,  1428,  1462,  1488,  1510, | ||||
|          1534,  1570,  1594,  1600,  1620,  1630,  1642,  1654,  1662,  1694, | ||||
|          1762,  1850,  1856,  1866,  1938,  2026,  2110,  2194,  2264,  2362, | ||||
|          2432,  2502,  2572,  2662,  2732,  2802,  2890,  2992,  3078,  3148, | ||||
|          3250,  3348,  3436,  3536,  3574,  3662,  3750,  3768,  3870,  3946, | ||||
|          4008,  4080,  4168,  4238,  4294,  4346,  4392,  4456,  4556,  4664, | ||||
|          4768,  4856,  4956,  5050,  5140,  5220,  5312,  5406,  5496,  5604, | ||||
|          5698,  5766,  5844,  5908,  5972,  6072,  6140,  6210,  6274,  6326, | ||||
|          6396,  6414,  6432,  6520,  6616,  6704,  6726,  6748,  6790,  6850, | ||||
|          6872,  6894,  6910,  6918,  6946,  6998,  7034,  7076,  7144,  7212, | ||||
|          7308,  7386,  7476,  7580,  7676,  7772,  7832,  7882,  7926,  7984, | ||||
|          8028,  8086,  8128,  8156,  8220,  8276,  8330,  8360,  8382,  8416, | ||||
|          8474,  8532,  8584,  8620,  8672,  8742,  8794,  8878,  8984,  9062, | ||||
|          9164,  9240,  9320,  9426,  9532,  9606,  9710,  9818,  9892,  9986, | ||||
|         10080, 10124, 10186, 10298, 10360, 10472, 10578, 10684, 10756, 10846, | ||||
|         10956, 11040, 11114, 11162, 11236, 11310, 11324, 11424, 11532, 11638, | ||||
|         11744, 11784, 11800, 11818, 11844, 11868, 11898, 11934, 11988, 12008, | ||||
|         12030, 12060, 12070, 12156, 12230, 12316, 12370, 12416, 12474, 12538, | ||||
|         12602, 12658, 12712, 12766, 12820, 12872, 12924, 12976, 13028, 13078, | ||||
|         13180, 13282, 13338, 13418, 13518, 13588, 13674, 13776, 13866, 13936, | ||||
|         14028, 14136, 14214, 14292, 14400, 14488, 14576, 14666, 14754, 14814, | ||||
|         14866, 14940, 15038, 15140, 15224, 15310, 15410, 15512, 15610, 15708, | ||||
|         15802, 15890, 15978, 16066, 16144, 16248, 16276, 16354, 16454, 16548, | ||||
|         16590, 16636, 16678, 16718, 16808, 16898, 16920, 17000, 17080, 17132, | ||||
|         17184, 17236, 17336, 17434, 17504, 17560, 17612, 17694, 17788, 17844, | ||||
|         17948, 18022, 18128, 18204, 18306, 18358, 18436, 18508, 18580, 18660, | ||||
|         18742, 18844, 18908, 19002, 19106, 19182, 19286, 19314, 19382, 19436, | ||||
|         19514, 19600, 19696, 19790, 19876, 19968, 20042, 20134, 20226, 20304, | ||||
|         20396, 20488, 20562, 20584, 20696, 20802, 20890, 20968, 21078, 21170, | ||||
|         21248, 21342, 21438, 21536, 21644, 21748, 21860, 21954, 22054, 22162, | ||||
|         22254, 22366, 22464, 22516, 22538, 22580, 22608, 22704, 22732, 22762, | ||||
|         22788, 22882, 22906, 22976, 22990, 23032, 23050, 23070, 23092, 23130, | ||||
|         23192, 23232, 23264, 23290, 23312, 23336, 23366, 23408, 23472, 23534, | ||||
|         23598, 23616, 23642, 23656, 23678, 23712, 23726, 23746, 23854, 23870, | ||||
|         23888, 23912, 23960, 24000, 24038, 24062, 24092, 24128, 24138, 24150, | ||||
|         24168, 24202, 24218, 24232, 24248, 24270, 24294, 24332, 24354, 24364, | ||||
|         24392, 24412, 24434, 24444, 24476, 24486, 24512, 24526, 24564, 24606, | ||||
|         24666, 24728, 24794, 24844, 24886, 24914, 24970, 25062, 25104, 25130, | ||||
|         25164, 25192, 25236, 25266, 25300, 25388, 25476, 25520, 25594, 25684, | ||||
|         25724, 25754, 25770, 25798, 25812, 25824, 25832, 25842, 25888, 25976, | ||||
|         26000, 26022, 26110, 26132, 26144, 26152, 26170, 26188, 26198, 26236, | ||||
|         26284, 26338, 26406, 26462, 26552, 26644, 26718, 26822, 26886, 26992, | ||||
|         27078, 27182, 27288, 27390, 27492, 27594, 27680, 27782, 27890, 27992, | ||||
|         28094, 28180, 28282, 28368, 28454, 28542, 28638, 28730, 28840, 28950, | ||||
|         29060, 29146, 29250, 29354, 29458, 29558, 29658, 29752, 29838, 29912, | ||||
|         30000, 30074, 30130, 30204, 30278, 30364, 30468, 30570, 30676, 30778, | ||||
|         30864, 30972, 31076, 31136, 31194, 31232, 31326, 31408, 31476, 31550, | ||||
|         31610, 31640, 31736, 31824, 31846, 31870, 31974, 32042, 32148, 32204, | ||||
|         32248, 32290, 32334, 32424, 32512, 32522, 32608, 32714, 32790, 32900, | ||||
|         33008, 33058, 33072, 33142, 33212, 33312, 33394, 33490, 33578, 33648, | ||||
|         33718, 33784, 33858, 33932, 34020, 34100, 34180, 34262, 34330, 34398, | ||||
|         34472, 34542, 34624, 34698, 34794, 34882, 34970, 35058, 35140, 35200, | ||||
|         35260, 35318, 35376, 35440, 35504, 35562, 35618, 35722, 35826, 35926, | ||||
|         35982, 36000, 36104, 36132]),  | ||||
|         'y': tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13, | ||||
|          14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27, | ||||
|          28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41, | ||||
|          42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55, | ||||
|          56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69, | ||||
|          70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83, | ||||
|          84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97, | ||||
|          98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, | ||||
|         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, | ||||
|         126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, | ||||
|         140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, | ||||
|         154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, | ||||
|         168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, | ||||
|         182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, | ||||
|         196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, | ||||
|         210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, | ||||
|         224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, | ||||
|         238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, | ||||
|         252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, | ||||
|         266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, | ||||
|         280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, | ||||
|         294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, | ||||
|         308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, | ||||
|         322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, | ||||
|         336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, | ||||
|         350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, | ||||
|         364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, | ||||
|         378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, | ||||
|         392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, | ||||
|         406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, | ||||
|         420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, | ||||
|         434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, | ||||
|         448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, | ||||
|         462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, | ||||
|         476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, | ||||
|         490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, | ||||
|         504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, | ||||
|         518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, | ||||
|         532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, | ||||
|         546, 547, 548, 549, 550, 551, 552, 553]),  | ||||
|         'idx': tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13, | ||||
|          14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27, | ||||
|          28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41, | ||||
|          42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55, | ||||
|          56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69, | ||||
|          70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83, | ||||
|          84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97, | ||||
|          98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, | ||||
|         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, | ||||
|         126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, | ||||
|         140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, | ||||
|         154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, | ||||
|         168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, | ||||
|         182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, | ||||
|         196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, | ||||
|         210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, | ||||
|         224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, | ||||
|         238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, | ||||
|         252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, | ||||
|         266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, | ||||
|         280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, | ||||
|         294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, | ||||
|         308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, | ||||
|         322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, | ||||
|         336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, | ||||
|         350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, | ||||
|         364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, | ||||
|         378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, | ||||
|         392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, | ||||
|         406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, | ||||
|         420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, | ||||
|         434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, | ||||
|         448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, | ||||
|         462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, | ||||
|         476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, | ||||
|         490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, | ||||
|         504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, | ||||
|         518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, | ||||
|         532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, | ||||
|         546, 547, 548, 549, 550, 551, 552, 553])}) | ||||
| ``` | ||||
| </details> | ||||
|  | ||||
| The different keys in the dictionary are: | ||||
| - `self.data` | ||||
|     - `edge_attr`: In nasbenchDiT, we get (edge_number, 1) size, but in DiT, we get (edge_number) dimension. | ||||
|     - `idx`: we do not have this key in nasbenchDiT. | ||||
| - `self.slices`: same with `self.data`  | ||||
|  | ||||
| So I try to modify the self.data and self.slices to be compatible with the DiT dataset. | ||||
|  | ||||
| ## DataInfos | ||||
|  | ||||
| now need a dataset to init the datainfo, | ||||
| because I need the nasbench api in the dataset | ||||
| ```python | ||||
|         self.api = dataset.api | ||||
| ``` | ||||
|  | ||||
| ## get_train_smiles() vs. get_train_graphs() | ||||
|  | ||||
| We want to remove molecules from the dataset, so we need to rerwrite the get_train_smiles() function to get_train_graphs() function. | ||||
|  | ||||
|  | ||||
| 20240628 | ||||
|  | ||||
| ## change the dataset ready for DiT | ||||
|  | ||||
| Changed the process function in the dataset class to be compatible with the DiT dataset. | ||||
|  | ||||
| <details> | ||||
|  | ||||
| ```python | ||||
|     def process(self): | ||||
|  | ||||
|         data_list = [] | ||||
|         len_data = len(self.api) | ||||
|  | ||||
|         def graph_to_graph_data(graph): | ||||
|             ops = graph[1] | ||||
|             adj = graph[0] | ||||
|             nodes = [] | ||||
|             for op in ops: | ||||
|                 nodes.append(op_type[op]) | ||||
|             x = torch.LongTensor(nodes) | ||||
|  | ||||
|             edges_list = [] | ||||
|             edge_type = [] | ||||
|             for start in range(len(ops)): | ||||
|                 for end in range(len(ops)): | ||||
|                     if adj[start][end] == 1: | ||||
|                         edges_list.append((start, end)) | ||||
|                         edge_type.append(1) | ||||
|                         edges_list.append((end, start)) | ||||
|                         edge_type.append(1) | ||||
|              | ||||
|             edge_index = torch.tensor(edges_list, dtype=torch.long).t() | ||||
|             edge_type = torch.tensor(edge_type, dtype=torch.long) | ||||
|             edge_attr = edge_type | ||||
|             y = torch.tensor([0], dtype=torch.float).view(1, -1) | ||||
|             data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i) | ||||
|             return data | ||||
|         with tqdm(total = len_data) as pbar: | ||||
|             active_nodes = set() | ||||
|             for i in range(len_data): | ||||
|                 arch_info = self.api.query_meta_info_by_index(i) | ||||
|                 nodes, edges = parse_architecture_string(arch_info.arch_str) | ||||
|                 adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges) | ||||
|                 for op in ops: | ||||
|                     if op not in active_nodes: | ||||
|                         active_nodes.add(op) | ||||
|                 data = graph_to_graph_data((adj_matrix, ops))  | ||||
|                 data_list.append(data) | ||||
|                 pbar.update(1) | ||||
|          | ||||
|         torch.save(self.collate(data_list), self.processed_paths[0]) | ||||
| ``` | ||||
| </details> | ||||
|  | ||||
|  | ||||
| 20240629 | ||||
|  | ||||
| ## change to remove loading nasbench201 but read the data from the pt file | ||||
|  | ||||
		Reference in New Issue
	
	Block a user