adjust threshhold for cifar100

add read swap csv codes
add a test performance script.
2024-08-29 10:37:42 +02:00 · 2024-08-29 09:25:15 +02:00 · 2024-08-26 20:12:47 +02:00 · 2024-08-25 16:10:23 +02:00 · 2024-08-21 10:40:00 +02:00
5 changed files with 47737 additions and 187682 deletions
--- a/graph_dit/datasets/dataset.py
+++ b/graph_dit/datasets/dataset.py
@@ -771,9 +771,10 @@ class Dataset(InMemoryDataset):
            edge_type = torch.tensor(edge_type, dtype=torch.long)
            edge_attr = edge_type
            # y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
-            y = get_nasbench201_idx_score(idx, train_loader, searchspace, args, device)
+            # y = get_nasbench201_idx_score(idx, train_loader, searchspace, args, device)
+            y = self.swap_scores[idx]
            print(y, idx)
-            if y > 1600:
+            if y > 60000:
                print(f'idx={idx}, y={y}')
                y = torch.tensor([1, 1], dtype=torch.float).view(1, -1)
                data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
@@ -812,6 +813,14 @@ class Dataset(InMemoryDataset):
        args.num_labels = 1
        searchspace = nasspace.get_search_space(args)
        train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
+        self.swap_scores = []
+        import csv
+        # with open('/nfs/data3/hanzhang/nasbenchDiT/graph_dit/swap_results.csv', 'r') as f:
+        with open('/nfs/data3/hanzhang/nasbenchDiT/graph_dit/swap_results_cifar100.csv', 'r') as f:
+            reader = csv.reader(f)
+            header = next(reader)
+            data = [row for row in reader]
+            self.swap_scores = [float(row[0]) for row in data]
        device = torch.device('cuda:2')
        with tqdm(total = len_data) as pbar:
            active_nodes = set()
@@ -823,14 +832,8 @@ class Dataset(InMemoryDataset):
            flex_graph_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json'
            for graph in graph_list:
                print(f'iterate every graph in graph_list, here is {i}')
-                # arch_info = self.api.query_meta_info_by_index(i)
-                # results = self.api.query_by_index(i, 'cifar100')
                arch_info = graph['arch_str']
-                # results = 
-                # nodes, edges = parse_architecture_string(arch_info.arch_str)
-                # ops, adj_matrix = parse_architecture_string(arch_info.arch_str, padding=4)
                ops, adj_matrix, ori_nodes, ori_adj = parse_architecture_string(arch_info, padding=4)
-                # adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)
                for op in ops:
                    if op not in active_nodes:
                        active_nodes.add(op)
@@ -839,12 +842,6 @@ class Dataset(InMemoryDataset):
                if data is None:
                    pbar.update(1)
                    continue
-                # with open(flex_graph_path, 'a') as f:
-                #     flex_graph = {
-                #         'adj_matrix': adj_matrix,
-                #         'ops': ops,
-                #     }
-                #     json.dump(flex_graph, f)
                flex_graph_list.append({
                    'adj_matrix':adj_matrix,
                    'ops': ops,
--- a/graph_dit/exp_201/barplog.png
+++ b/graph_dit/exp_201/barplog.png
--- a/graph_dit/exp_201/main.py
+++ b/graph_dit/exp_201/main.py
@@ -2,44 +2,45 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 from nas_201_api import NASBench201API as API
-from naswot.score_networks import get_nasbench201_idx_score
-from naswot import datasets as dt
-from naswot import nasspace
+# from naswot.score_networks import get_nasbench201_idx_score
+# from naswot import datasets as dt
+# from naswot import nasspace

-class Args():
-    pass
-args = Args()
-args.trainval = True
-args.augtype = 'none'
-args.repeat = 1
-args.score = 'hook_logdet'
-args.sigma = 0.05
-args.nasspace = 'nasbench201'
-args.batch_size = 128
-args.GPU = '0'
-args.dataset = 'cifar10'
-args.api_loc = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
-args.data_loc = '../cifardata/'
-args.seed = 777
-args.init = ''
-args.save_loc = 'results'
-args.save_string = 'naswot'
-args.dropout = False
-args.maxofn = 1
-args.n_samples = 100
-args.n_runs = 500
-args.stem_out_channels = 16
-args.num_stacks = 3
-args.num_modules_per_stack = 3
-args.num_labels = 1
-searchspace = nasspace.get_search_space(args)
-train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
-device = torch.device('cuda:2')
+# class Args():
+#     pass
+# args = Args()
+# args.trainval = True
+# args.augtype = 'none'
+# args.repeat = 1
+# args.score = 'hook_logdet'
+# args.sigma = 0.05
+# args.nasspace = 'nasbench201'
+# args.batch_size = 128
+# args.GPU = '0'
+# args.dataset = 'cifar10'
+# args.api_loc = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
+# args.data_loc = '../cifardata/'
+# args.seed = 777
+# args.init = ''
+# args.save_loc = 'results'
+# args.save_string = 'naswot'
+# args.dropout = False
+# args.maxofn = 1
+# args.n_samples = 100
+# args.n_runs = 500
+# args.stem_out_channels = 16
+# args.num_stacks = 3
+# args.num_modules_per_stack = 3
+# args.num_labels = 1
+# searchspace = nasspace.get_search_space(args)
+# train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
+# device = torch.device('cuda:2')


+source = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
+api = API(source)
+

-# source = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
-# api = API(source)



@@ -50,8 +51,10 @@ percentages = []
 len_201 = 15625

 for i in range(len_201):
-    percentage = get_nasbench201_idx_score(i, train_loader, searchspace, args, device)
-    percentages.append(percentage)
+    # percentage = get_nasbench201_idx_score(i, train_loader, searchspace, args, device)
+    results = api.query_by_index(i, 'cifar10')
+    result = results[111].get_eval('ori-test')
+    percentages.append(result)

 # 定义10%区间
 bins = [i for i in range(0, 101, 10)]
--- a/graph_dit/test_nasbench.ipynb
+++ b/graph_dit/test_nasbench.ipynb
--- a/graph_dit/test_perf.py
+++ b/graph_dit/test_perf.py
@@ -0,0 +1,144 @@
+from nas_201_api import NASBench201API as API
+import re
+import pandas as pd
+import json
+import numpy as np
+import argparse
+
+api = API('./NAS-Bench-201-v1_1-096897.pth')
+
+parser = argparse.ArgumentParser(description='Process some integers.')
+
+parser.add_argument('--file_path', type=str, default='211035.txt',)
+parser.add_argument('--datasets', type=str, default='cifar10',)
+args = parser.parse_args()
+
+def process_graph_data(text):
+    # Split the input text into sections for each graph
+    graph_sections = text.strip().split('nodes:')
+    
+    # Prepare lists to store data
+    nodes_list = []
+    edges_list = []
+    results_list = []
+    
+    for section in graph_sections[1:]:
+        # Extract nodes
+        nodes_section = section.split('edges:')[0]
+        nodes_match = re.search(r'(tensor\(\d+\) ?)+', section)
+        if nodes_match:
+            nodes = re.findall(r'tensor\((\d+)\)', nodes_match.group(0))
+            nodes_list.append(nodes)
+        
+        # Extract edges
+        edge_section = section.split('edges:')[1]
+        edges_match = re.search(r'edges:', section)
+        if edges_match:
+            edges = re.findall(r'tensor\((\d+)\)', edge_section)
+            edges_list.append(edges)
+        
+        # Extract the last floating point number as a result
+    
+    # Create a DataFrame to store the extracted data
+    data = {
+        'nodes': nodes_list,
+        'edges': edges_list,
+    }
+    data['nodes'] = [[int(x) for x in node] for node in data['nodes']]
+    data['edges'] = [[int(x) for x in edge] for edge in data['edges']]
+    def split_list(input_list, chunk_size):
+        return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
+    data['edges'] = [split_list(edge, 8) for edge in data['edges']]
+
+    print(data)
+    df = pd.DataFrame(data)
+    print('df')
+    print(df['nodes'][0], df['edges'][0])
+    return df
+
+def is_valid_nasbench201(adj, ops):
+    print(ops)
+    if ops[0] != 0 or ops[-1] != 6:
+        return False
+    for i in range(2, len(ops) - 1):
+        if ops[i] not in [1, 2, 3, 4, 5]:
+            return False
+    adj_mat = [ [0, 1, 1, 0, 1, 0, 0, 0],
+                [0, 0, 0, 1, 0, 1 ,0 ,0],
+                [0, 0, 0, 0, 0, 0, 1, 0],
+                [0, 0, 0, 0, 0, 0, 1, 0],
+                [0, 0, 0, 0, 0, 0, 0, 1],
+                [0, 0, 0, 0, 0, 0, 0, 1],
+                [0, 0, 0, 0, 0, 0, 0, 1],
+                [0, 0, 0, 0, 0, 0, 0, 0]]
+ 
+    for i in range(len(adj)):
+        for j in range(len(adj[i])):
+            if adj[i][j] not in [0, 1]:
+                return False
+            if j > i:
+                if adj[i][j] != adj_mat[i][j]:
+                    return False
+    return True
+
+num_to_op = ['input', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3', 'skip_connect', 'none', 'output']
+def nodes_to_arch_str(nodes):
+    nodes_str = [num_to_op[node] for node in nodes]
+    arch_str = '|' + nodes_str[1] + '~0|+' + \
+               '|' + nodes_str[2] + '~0|' + nodes_str[3] + '~1|+' +\
+               '|' + nodes_str[4] + '~0|' + nodes_str[5] + '~1|' + nodes_str[6] + '~2|' 
+    return arch_str
+
+filename = args.file_path
+datasets_name = args.datasets
+
+with open('./output_graphs/' + filename, 'r') as f:
+    texts = f.read()
+    df = process_graph_data(texts)
+    valid = 0
+    not_valid = 0
+    scores = []
+
+    # 定义分类标准和分布字典的映射
+    thresholds = {
+        'cifar10': [90, 91, 92, 93, 94],
+        'cifar100': [68,69,70, 71, 72, 73]
+    }
+    dist = {f'<{threshold}': 0 for threshold in thresholds[datasets_name]}
+    dist[f'>{thresholds[datasets_name][-1]}'] = 0
+
+    for i in range(len(df)):
+        nodes = df['nodes'][i]
+        edges = df['edges'][i]
+        result = is_valid_nasbench201(edges, nodes)
+        if result:
+            valid += 1
+            arch_str = nodes_to_arch_str(nodes)
+            index = api.query_index_by_arch(arch_str)
+            res = api.get_more_info(index, datasets_name, None, hp=200, is_random=False)
+            acc = res['test-accuracy']
+            scores.append((index, acc))
+
+            # 根据阈值更新分布
+            updated = False
+            for threshold in thresholds[datasets_name]:
+                if acc < threshold:
+                    dist[f'<{threshold}'] += 1
+                    updated = True
+                    break
+            if not updated:
+                dist[f'>{thresholds[datasets_name][-1]}'] += 1
+        else:
+            not_valid += 1
+
+    with open('./output_graphs/' + filename + '_' + datasets_name +'.json', 'w') as f:
+        json.dump(scores, f)
+
+    print(scores)
+    print(valid, not_valid)
+    print(dist)
+    print("mean: ", np.mean([x[1] for x in scores]))
+    print("max: ", np.max([x[1] for x in scores]))
+    print("min: ", np.min([x[1] for x in scores]))
+
+
Author	SHA1	Message	Date
mhz	d36e1d1077	adjust threshhold for cifar100	2024-08-29 10:37:42 +02:00
mhz	82183d3df7	add read swap csv codes	2024-08-29 09:25:15 +02:00
mhz	c86db9b6ba	add a test performance script.	2024-08-26 20:12:47 +02:00
mhz	a0473008a1	find the mysterious 94.37	2024-08-25 16:10:23 +02:00
mhz	05ee34e355	update the script to use nasbench-201 api	2024-08-21 10:40:00 +02:00