update the taskmodel

2024-06-30 16:39:42 +02:00
parent 66fe70028e
commit 7274b3f606
1 changed files with 53 additions and 1 deletions
--- a/graph_dit/metrics/property_metric.py
+++ b/graph_dit/metrics/property_metric.py
@@ -15,6 +15,17 @@ from rdkit.Chem import AllChem
 from rdkit import DataStructs
 from rdkit.Chem import rdMolDescriptors
 rdBase.DisableLog('rdApp.error')
 import json
 op_type = {
    'nor_conv_1x1': 1,
    'nor_conv_3x3': 2,
    'avg_pool_3x3': 3,
    'skip_connect': 4,
    'output': 5,
    'none': 6,
    'input': 7
 }
 task_to_colname = {
    'hiv_b': 'HIV_active',
@@ -32,8 +43,10 @@ tasktype_name = {
    'O2': 'regression',
    'N2': 'regression',
    'CO2': 'regression',
    'nasbench201': 'regression',
 }
 class TaskModel():
    """Scores based on an ECFP classifier."""
    def __init__(self, model_path, task_name):
@@ -55,8 +68,47 @@ class TaskModel():
            perfermance = self.train()
            dump(self.model, model_path)
            print('Oracle peformance: ', perfermance)
    def train(self):
        def read_adj_ops_from_json(filename):
            with open(filename, 'r') as json_file:
                data = json.load(json_file)
            adj_ops_pairs = []
            for item in data:
                adj_matrix = np.array(item['adj_matrix'])
                ops = item['ops']
                acc = item['train'][0]['accuracy']
                adj_ops_pairs.append((adj_matrix, ops, acc))
            return adj_ops_pairs
        def feature_from_adj_and_ops(adj, ops):
            return np.concatenate([adj.flatten(), ops])
        filename = '/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json'
        graphs = read_adj_ops_from_json(filename)
        adjs = []
        opss = []
        accs = []
        features = []
        for graph in graphs:
            adj, ops, acc=graph
            op_code = [op_type[op] for op in ops]
            adjs.append(adj)
            opss.append(op_code)
            accs.append(acc)
            features.append(feature_from_adj_and_ops(adj, op_code))
        features = np.array(features)
        labels = np.array(accs)
        mask = ~np.isnan(labels)
        labels = labels[mask]
        features = features[mask]
        self.model.fit(features, labels)
        y_pred = self.model.predict(features)
        perf = self.metric_func(labels, y_pred)
        print(f'{self.task_name} performance: {perf}')
        return perf
    def train__(self):
        data_path = os.path.dirname(self.model_path)
        data_path = os.path.join(os.path.dirname(self.model_path), '..', f'raw/{self.task_name}.csv.gz')
        df = pd.read_csv(data_path)