swap-nas/analyze.py

import csv
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
import argparse

def plot(l,filename):
    lenth = len(l)
    threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000]
    labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k']
    l = [i/15625 for i in l]
    l = l[:7]
    datasets = filename.split('_')[-1].split('.')[0]
    plt.figure(figsize=(8, 6))
    plt.subplots_adjust(top=0.85)
    plt.ylim(0,0.3)
    plt.title('Distribution of Swap Scores in ' + datasets)
    plt.bar(labels, l)
    for i, v in enumerate(l):
        plt.text(i, v + 0.01, str(round(v, 2)), ha='center', va='bottom')
    plt.savefig(filename)

def analyse(filename):
    l = [0 for i in range(10)]
    scores = []
    count = 0
    best_value = -1
    with open(filename) as file:
        reader = csv.reader(file)
        header = next(reader)
        data = [row for row in reader]
        
        for row in data:
            score = row[0]
            best_value = max(best_value, float(score))
            # print(score)
            ind = float(score) // 10000
            ind = int(ind)
            l[ind] += 1
            acc = row[1]
            index = row[2]
            datas = list(zip(score, acc, index))
            scores.append(score)
    print(max(scores))
    results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index'])
    print(results['swap_score'].max())
    print(best_value)
    plot(l, filename + '.png')
    return stats.spearmanr(results.swap_score, results.valid_acc)[0]

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--filename', type=str, help='Filename to analyze', default='swap_results.csv')

    args = parser.parse_args()

    print(analyse('output' + '/' + args.filename))
add a datsets option to specify the datset you want, add a plot script 2024-08-28 17:11:17 +02:00			`import csv`
			`import matplotlib.pyplot as plt`
			`from scipy import stats`
			`import pandas as pd`
add parser 2024-08-29 09:20:29 +02:00			`import argparse`
add a datsets option to specify the datset you want, add a plot script 2024-08-28 17:11:17 +02:00
add parser 2024-08-29 09:20:29 +02:00			`def plot(l,filename):`
add format plot codes 2024-08-29 09:36:33 +02:00			`lenth = len(l)`
add parser 2024-08-29 09:20:29 +02:00			`threshold = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000]`
add a datsets option to specify the datset you want, add a plot script 2024-08-28 17:11:17 +02:00			`labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k']`
			`l = [i/15625 for i in l]`
			`l = l[:7]`
add format plot codes 2024-08-29 09:36:33 +02:00			`datasets = filename.split('_')[-1].split('.')[0]`
			`plt.figure(figsize=(8, 6))`
			`plt.subplots_adjust(top=0.85)`
			`plt.ylim(0,0.3)`
			`plt.title('Distribution of Swap Scores in ' + datasets)`
add a datsets option to specify the datset you want, add a plot script 2024-08-28 17:11:17 +02:00			`plt.bar(labels, l)`
add format plot codes 2024-08-29 09:36:33 +02:00			`for i, v in enumerate(l):`
			`plt.text(i, v + 0.01, str(round(v, 2)), ha='center', va='bottom')`
add parser 2024-08-29 09:20:29 +02:00			`plt.savefig(filename)`
add a datsets option to specify the datset you want, add a plot script 2024-08-28 17:11:17 +02:00
			`def analyse(filename):`
			`l = [0 for i in range(10)]`
			`scores = []`
			`count = 0`
			`best_value = -1`
			`with open(filename) as file:`
			`reader = csv.reader(file)`
			`header = next(reader)`
			`data = [row for row in reader]`

			`for row in data:`
			`score = row[0]`
			`best_value = max(best_value, float(score))`
			`# print(score)`
			`ind = float(score) // 10000`
			`ind = int(ind)`
			`l[ind] += 1`
			`acc = row[1]`
			`index = row[2]`
			`datas = list(zip(score, acc, index))`
			`scores.append(score)`
			`print(max(scores))`
			`results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index'])`
			`print(results['swap_score'].max())`
			`print(best_value)`
add parser 2024-08-29 09:20:29 +02:00			`plot(l, filename + '.png')`
add a datsets option to specify the datset you want, add a plot script 2024-08-28 17:11:17 +02:00			`return stats.spearmanr(results.swap_score, results.valid_acc)[0]`

			`if __name__ == '__main__':`
add parser 2024-08-29 09:20:29 +02:00			`parser = argparse.ArgumentParser()`
			`parser.add_argument('--filename', type=str, help='Filename to analyze', default='swap_results.csv')`

			`args = parser.parse_args()`

			`print(analyse('output' + '/' + args.filename))`
add a datsets option to specify the datset you want, add a plot script 2024-08-28 17:11:17 +02:00