update over 60k plots
This commit is contained in:
		
							
								
								
									
										20
									
								
								analyze.py
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								analyze.py
									
									
									
									
									
								
							| @@ -4,16 +4,26 @@ from scipy import stats | |||||||
| import pandas as pd | import pandas as pd | ||||||
| import argparse | import argparse | ||||||
|  |  | ||||||
| def plot(l,filename): | def plot(l, thousands, filename): | ||||||
|     lenth = len(l) |     lenth = len(l) | ||||||
|     threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000] |     threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000] | ||||||
|     labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k'] |     labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k'] | ||||||
|     l = [i/15625 for i in l] |     l = [i/lenth for i in l] | ||||||
|     l = l[:7] |     l = l[:7] | ||||||
|  |     thousands = thousands[60:] | ||||||
|  |     thousands_labels = [str(i) + 'k' for i in range(60, 70)] | ||||||
|  |     plt.figure(figsize=(8, 6)) | ||||||
|  |     plt.subplots_adjust(top=0.85) | ||||||
|  |     plt.title('Distribution of Swap Scores over 60k') | ||||||
|  |     plt.bar(thousands_labels, thousands) | ||||||
|  |     for i, v in enumerate(thousands): | ||||||
|  |         plt.text(i, v + 0.01, str(v), ha='center', va='bottom') | ||||||
|  |     plt.savefig(filename + '_60k.png')  | ||||||
|  |  | ||||||
|     datasets = filename.split('_')[-1].split('.')[0] |     datasets = filename.split('_')[-1].split('.')[0] | ||||||
|     plt.figure(figsize=(8, 6)) |     plt.figure(figsize=(8, 6)) | ||||||
|     plt.subplots_adjust(top=0.85) |     plt.subplots_adjust(top=0.85) | ||||||
|     plt.ylim(0,0.3) |     # plt.ylim(0,0.3) | ||||||
|     plt.title('Distribution of Swap Scores in ' + datasets) |     plt.title('Distribution of Swap Scores in ' + datasets) | ||||||
|     plt.bar(labels, l) |     plt.bar(labels, l) | ||||||
|     for i, v in enumerate(l): |     for i, v in enumerate(l): | ||||||
| @@ -29,6 +39,7 @@ def analyse(filename): | |||||||
|         reader = csv.reader(file) |         reader = csv.reader(file) | ||||||
|         header = next(reader) |         header = next(reader) | ||||||
|         data = [row for row in reader] |         data = [row for row in reader] | ||||||
|  |         thousands = [0 for i in range(70)] | ||||||
|          |          | ||||||
|         for row in data: |         for row in data: | ||||||
|             score = row[0] |             score = row[0] | ||||||
| @@ -37,6 +48,7 @@ def analyse(filename): | |||||||
|             ind = float(score) // 10000 |             ind = float(score) // 10000 | ||||||
|             ind = int(ind) |             ind = int(ind) | ||||||
|             l[ind] += 1 |             l[ind] += 1 | ||||||
|  |             thousands[int(float(score) // 1000)] += 1 | ||||||
|             acc = row[1] |             acc = row[1] | ||||||
|             index = row[2] |             index = row[2] | ||||||
|             datas = list(zip(score, acc, index)) |             datas = list(zip(score, acc, index)) | ||||||
| @@ -45,7 +57,7 @@ def analyse(filename): | |||||||
|     results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index']) |     results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index']) | ||||||
|     print(results['swap_score'].max()) |     print(results['swap_score'].max()) | ||||||
|     print(best_value) |     print(best_value) | ||||||
|     plot(l, filename + '.png') |     plot(l, thousands, filename + '.png') | ||||||
|     return stats.spearmanr(results.swap_score, results.valid_acc)[0] |     return stats.spearmanr(results.swap_score, results.valid_acc)[0] | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user