update over 60k plots
This commit is contained in:
parent
968157b657
commit
4df5615380
20
analyze.py
20
analyze.py
@ -4,16 +4,26 @@ from scipy import stats
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
def plot(l,filename):
|
def plot(l, thousands, filename):
|
||||||
lenth = len(l)
|
lenth = len(l)
|
||||||
threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000]
|
threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000]
|
||||||
labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k']
|
labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k']
|
||||||
l = [i/15625 for i in l]
|
l = [i/lenth for i in l]
|
||||||
l = l[:7]
|
l = l[:7]
|
||||||
|
thousands = thousands[60:]
|
||||||
|
thousands_labels = [str(i) + 'k' for i in range(60, 70)]
|
||||||
|
plt.figure(figsize=(8, 6))
|
||||||
|
plt.subplots_adjust(top=0.85)
|
||||||
|
plt.title('Distribution of Swap Scores over 60k')
|
||||||
|
plt.bar(thousands_labels, thousands)
|
||||||
|
for i, v in enumerate(thousands):
|
||||||
|
plt.text(i, v + 0.01, str(v), ha='center', va='bottom')
|
||||||
|
plt.savefig(filename + '_60k.png')
|
||||||
|
|
||||||
datasets = filename.split('_')[-1].split('.')[0]
|
datasets = filename.split('_')[-1].split('.')[0]
|
||||||
plt.figure(figsize=(8, 6))
|
plt.figure(figsize=(8, 6))
|
||||||
plt.subplots_adjust(top=0.85)
|
plt.subplots_adjust(top=0.85)
|
||||||
plt.ylim(0,0.3)
|
# plt.ylim(0,0.3)
|
||||||
plt.title('Distribution of Swap Scores in ' + datasets)
|
plt.title('Distribution of Swap Scores in ' + datasets)
|
||||||
plt.bar(labels, l)
|
plt.bar(labels, l)
|
||||||
for i, v in enumerate(l):
|
for i, v in enumerate(l):
|
||||||
@ -29,6 +39,7 @@ def analyse(filename):
|
|||||||
reader = csv.reader(file)
|
reader = csv.reader(file)
|
||||||
header = next(reader)
|
header = next(reader)
|
||||||
data = [row for row in reader]
|
data = [row for row in reader]
|
||||||
|
thousands = [0 for i in range(70)]
|
||||||
|
|
||||||
for row in data:
|
for row in data:
|
||||||
score = row[0]
|
score = row[0]
|
||||||
@ -37,6 +48,7 @@ def analyse(filename):
|
|||||||
ind = float(score) // 10000
|
ind = float(score) // 10000
|
||||||
ind = int(ind)
|
ind = int(ind)
|
||||||
l[ind] += 1
|
l[ind] += 1
|
||||||
|
thousands[int(float(score) // 1000)] += 1
|
||||||
acc = row[1]
|
acc = row[1]
|
||||||
index = row[2]
|
index = row[2]
|
||||||
datas = list(zip(score, acc, index))
|
datas = list(zip(score, acc, index))
|
||||||
@ -45,7 +57,7 @@ def analyse(filename):
|
|||||||
results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index'])
|
results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index'])
|
||||||
print(results['swap_score'].max())
|
print(results['swap_score'].max())
|
||||||
print(best_value)
|
print(best_value)
|
||||||
plot(l, filename + '.png')
|
plot(l, thousands, filename + '.png')
|
||||||
return stats.spearmanr(results.swap_score, results.valid_acc)[0]
|
return stats.spearmanr(results.swap_score, results.valid_acc)[0]
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
Reference in New Issue
Block a user