update over 60k plots

This commit is contained in:
Mhrooz 2024-08-31 15:50:12 +02:00
parent 968157b657
commit 4df5615380

View File

@ -4,16 +4,26 @@ from scipy import stats
import pandas as pd import pandas as pd
import argparse import argparse
def plot(l,filename): def plot(l, thousands, filename):
lenth = len(l) lenth = len(l)
threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000] threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000]
labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k'] labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k']
l = [i/15625 for i in l] l = [i/lenth for i in l]
l = l[:7] l = l[:7]
thousands = thousands[60:]
thousands_labels = [str(i) + 'k' for i in range(60, 70)]
plt.figure(figsize=(8, 6))
plt.subplots_adjust(top=0.85)
plt.title('Distribution of Swap Scores over 60k')
plt.bar(thousands_labels, thousands)
for i, v in enumerate(thousands):
plt.text(i, v + 0.01, str(v), ha='center', va='bottom')
plt.savefig(filename + '_60k.png')
datasets = filename.split('_')[-1].split('.')[0] datasets = filename.split('_')[-1].split('.')[0]
plt.figure(figsize=(8, 6)) plt.figure(figsize=(8, 6))
plt.subplots_adjust(top=0.85) plt.subplots_adjust(top=0.85)
plt.ylim(0,0.3) # plt.ylim(0,0.3)
plt.title('Distribution of Swap Scores in ' + datasets) plt.title('Distribution of Swap Scores in ' + datasets)
plt.bar(labels, l) plt.bar(labels, l)
for i, v in enumerate(l): for i, v in enumerate(l):
@ -29,6 +39,7 @@ def analyse(filename):
reader = csv.reader(file) reader = csv.reader(file)
header = next(reader) header = next(reader)
data = [row for row in reader] data = [row for row in reader]
thousands = [0 for i in range(70)]
for row in data: for row in data:
score = row[0] score = row[0]
@ -37,6 +48,7 @@ def analyse(filename):
ind = float(score) // 10000 ind = float(score) // 10000
ind = int(ind) ind = int(ind)
l[ind] += 1 l[ind] += 1
thousands[int(float(score) // 1000)] += 1
acc = row[1] acc = row[1]
index = row[2] index = row[2]
datas = list(zip(score, acc, index)) datas = list(zip(score, acc, index))
@ -45,7 +57,7 @@ def analyse(filename):
results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index']) results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index'])
print(results['swap_score'].max()) print(results['swap_score'].max())
print(best_value) print(best_value)
plot(l, filename + '.png') plot(l, thousands, filename + '.png')
return stats.spearmanr(results.swap_score, results.valid_acc)[0] return stats.spearmanr(results.swap_score, results.valid_acc)[0]
if __name__ == '__main__': if __name__ == '__main__':