update over 60k plots

2024-08-31 15:50:12 +02:00
parent 968157b657
commit 4df5615380
1 changed files with 16 additions and 4 deletions
--- a/analyze.py
+++ b/analyze.py
@@ -4,16 +4,26 @@ from scipy import stats
 import pandas as pd
 import argparse

-def plot(l,filename):
+def plot(l, thousands, filename):
    lenth = len(l)
    threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000]
    labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k']
-    l = [i/15625 for i in l]
+    l = [i/lenth for i in l]
    l = l[:7]
+    thousands = thousands[60:]
+    thousands_labels = [str(i) + 'k' for i in range(60, 70)]
+    plt.figure(figsize=(8, 6))
+    plt.subplots_adjust(top=0.85)
+    plt.title('Distribution of Swap Scores over 60k')
+    plt.bar(thousands_labels, thousands)
+    for i, v in enumerate(thousands):
+        plt.text(i, v + 0.01, str(v), ha='center', va='bottom')
+    plt.savefig(filename + '_60k.png') 
+
    datasets = filename.split('_')[-1].split('.')[0]
    plt.figure(figsize=(8, 6))
    plt.subplots_adjust(top=0.85)
-    plt.ylim(0,0.3)
+    # plt.ylim(0,0.3)
    plt.title('Distribution of Swap Scores in ' + datasets)
    plt.bar(labels, l)
    for i, v in enumerate(l):
@@ -29,6 +39,7 @@ def analyse(filename):
        reader = csv.reader(file)
        header = next(reader)
        data = [row for row in reader]
+        thousands = [0 for i in range(70)]
        
        for row in data:
            score = row[0]
@@ -37,6 +48,7 @@ def analyse(filename):
            ind = float(score) // 10000
            ind = int(ind)
            l[ind] += 1
+            thousands[int(float(score) // 1000)] += 1
            acc = row[1]
            index = row[2]
            datas = list(zip(score, acc, index))
@@ -45,7 +57,7 @@ def analyse(filename):
    results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index'])
    print(results['swap_score'].max())
    print(best_value)
-    plot(l, filename + '.png')
+    plot(l, thousands, filename + '.png')
    return stats.spearmanr(results.swap_score, results.valid_acc)[0]

 if __name__ == '__main__':