from nas_201_api import NASBench201API as API import re import pandas as pd import json import numpy as np import argparse api = API('./NAS-Bench-201-v1_1-096897.pth') parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--file_path', type=str, default='211035.txt',) args = parser.parse_args() def process_graph_data(text): # Split the input text into sections for each graph graph_sections = text.strip().split('nodes:') # Prepare lists to store data nodes_list = [] edges_list = [] results_list = [] for section in graph_sections[1:]: # Extract nodes nodes_section = section.split('edges:')[0] nodes_match = re.search(r'(tensor\(\d+\) ?)+', section) if nodes_match: nodes = re.findall(r'tensor\((\d+)\)', nodes_match.group(0)) nodes_list.append(nodes) # Extract edges edge_section = section.split('edges:')[1] edges_match = re.search(r'edges:', section) if edges_match: edges = re.findall(r'tensor\((\d+)\)', edge_section) edges_list.append(edges) # Extract the last floating point number as a result # Create a DataFrame to store the extracted data data = { 'nodes': nodes_list, 'edges': edges_list, } data['nodes'] = [[int(x) for x in node] for node in data['nodes']] data['edges'] = [[int(x) for x in edge] for edge in data['edges']] def split_list(input_list, chunk_size): return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)] data['edges'] = [split_list(edge, 8) for edge in data['edges']] print(data) df = pd.DataFrame(data) print('df') print(df['nodes'][0], df['edges'][0]) return df def is_valid_nasbench201(adj, ops): print(ops) if ops[0] != 0 or ops[-1] != 6: return False for i in range(2, len(ops) - 1): if ops[i] not in [1, 2, 3, 4, 5]: return False adj_mat = [ [0, 1, 1, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 1 ,0 ,0], [0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0]] for i in range(len(adj)): for j in range(len(adj[i])): if adj[i][j] not in [0, 1]: return False if j > i: if adj[i][j] != adj_mat[i][j]: return False return True num_to_op = ['input', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3', 'skip_connect', 'none', 'output'] def nodes_to_arch_str(nodes): nodes_str = [num_to_op[node] for node in nodes] arch_str = '|' + nodes_str[1] + '~0|+' + \ '|' + nodes_str[2] + '~0|' + nodes_str[3] + '~1|+' +\ '|' + nodes_str[4] + '~0|' + nodes_str[5] + '~1|' + nodes_str[6] + '~2|' return arch_str filename = args.file_path with open('./output_graphs/' + filename, 'r') as f: texts = f.read() df = process_graph_data(texts) valid = 0 not_valid = 0 scores = [] dist = {'<90':0, '<91':0, '<92':0, '<93':0, '<94':0, '>94':0} for i in range(len(df)): nodes = df['nodes'][i] edges = df['edges'][i] result = is_valid_nasbench201(edges, nodes) if result: valid += 1 arch_str = nodes_to_arch_str(nodes) index = api.query_index_by_arch(arch_str) # results = api.query_by_index(index, 'cifar10', hp='200') # print(results) # result = results[888].get_eval('ori-test') res = api.get_more_info(index, 'cifar10', None, hp=200, is_random=False) acc = res['test-accuracy'] scores.append((index, acc)) if acc < 90: dist['<90'] += 1 elif acc < 91 and acc >= 90: dist['<91'] += 1 elif acc < 92 and acc >= 91: dist['<92'] += 1 elif acc < 93 and acc >= 92: dist['<93'] += 1 elif acc < 94 and acc >= 93: dist['<94'] += 1 else: dist['>94'] += 1 else: not_valid += 1 with open('./output_graphs/' + filename + '.json', 'w') as f: json.dump(scores, f) print(scores) print(valid, not_valid) print(dist) print("mean: ", np.mean([x[1] for x in scores])) print("max: ", np.max([x[1] for x in scores])) print("min: ", np.min([x[1] for x in scores]))