2024-08-26 20:12:47 +02:00
|
|
|
from nas_201_api import NASBench201API as API
|
|
|
|
import re
|
|
|
|
import pandas as pd
|
|
|
|
import json
|
|
|
|
import numpy as np
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
api = API('./NAS-Bench-201-v1_1-096897.pth')
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description='Process some integers.')
|
|
|
|
|
|
|
|
parser.add_argument('--file_path', type=str, default='211035.txt',)
|
2024-08-29 10:37:42 +02:00
|
|
|
parser.add_argument('--datasets', type=str, default='cifar10',)
|
2024-08-26 20:12:47 +02:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
def process_graph_data(text):
|
|
|
|
# Split the input text into sections for each graph
|
|
|
|
graph_sections = text.strip().split('nodes:')
|
|
|
|
|
|
|
|
# Prepare lists to store data
|
|
|
|
nodes_list = []
|
|
|
|
edges_list = []
|
|
|
|
results_list = []
|
|
|
|
|
|
|
|
for section in graph_sections[1:]:
|
|
|
|
# Extract nodes
|
|
|
|
nodes_section = section.split('edges:')[0]
|
|
|
|
nodes_match = re.search(r'(tensor\(\d+\) ?)+', section)
|
|
|
|
if nodes_match:
|
|
|
|
nodes = re.findall(r'tensor\((\d+)\)', nodes_match.group(0))
|
|
|
|
nodes_list.append(nodes)
|
|
|
|
|
|
|
|
# Extract edges
|
|
|
|
edge_section = section.split('edges:')[1]
|
|
|
|
edges_match = re.search(r'edges:', section)
|
|
|
|
if edges_match:
|
|
|
|
edges = re.findall(r'tensor\((\d+)\)', edge_section)
|
|
|
|
edges_list.append(edges)
|
|
|
|
|
|
|
|
# Extract the last floating point number as a result
|
|
|
|
|
|
|
|
# Create a DataFrame to store the extracted data
|
|
|
|
data = {
|
|
|
|
'nodes': nodes_list,
|
|
|
|
'edges': edges_list,
|
|
|
|
}
|
|
|
|
data['nodes'] = [[int(x) for x in node] for node in data['nodes']]
|
|
|
|
data['edges'] = [[int(x) for x in edge] for edge in data['edges']]
|
|
|
|
def split_list(input_list, chunk_size):
|
|
|
|
return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
|
|
|
|
data['edges'] = [split_list(edge, 8) for edge in data['edges']]
|
|
|
|
|
|
|
|
print(data)
|
|
|
|
df = pd.DataFrame(data)
|
|
|
|
print('df')
|
|
|
|
print(df['nodes'][0], df['edges'][0])
|
|
|
|
return df
|
|
|
|
|
|
|
|
def is_valid_nasbench201(adj, ops):
|
|
|
|
print(ops)
|
|
|
|
if ops[0] != 0 or ops[-1] != 6:
|
|
|
|
return False
|
|
|
|
for i in range(2, len(ops) - 1):
|
|
|
|
if ops[i] not in [1, 2, 3, 4, 5]:
|
|
|
|
return False
|
|
|
|
adj_mat = [ [0, 1, 1, 0, 1, 0, 0, 0],
|
|
|
|
[0, 0, 0, 1, 0, 1 ,0 ,0],
|
|
|
|
[0, 0, 0, 0, 0, 0, 1, 0],
|
|
|
|
[0, 0, 0, 0, 0, 0, 1, 0],
|
|
|
|
[0, 0, 0, 0, 0, 0, 0, 1],
|
|
|
|
[0, 0, 0, 0, 0, 0, 0, 1],
|
|
|
|
[0, 0, 0, 0, 0, 0, 0, 1],
|
|
|
|
[0, 0, 0, 0, 0, 0, 0, 0]]
|
|
|
|
|
|
|
|
for i in range(len(adj)):
|
|
|
|
for j in range(len(adj[i])):
|
|
|
|
if adj[i][j] not in [0, 1]:
|
|
|
|
return False
|
|
|
|
if j > i:
|
|
|
|
if adj[i][j] != adj_mat[i][j]:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
num_to_op = ['input', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3', 'skip_connect', 'none', 'output']
|
|
|
|
def nodes_to_arch_str(nodes):
|
|
|
|
nodes_str = [num_to_op[node] for node in nodes]
|
|
|
|
arch_str = '|' + nodes_str[1] + '~0|+' + \
|
|
|
|
'|' + nodes_str[2] + '~0|' + nodes_str[3] + '~1|+' +\
|
|
|
|
'|' + nodes_str[4] + '~0|' + nodes_str[5] + '~1|' + nodes_str[6] + '~2|'
|
|
|
|
return arch_str
|
|
|
|
|
|
|
|
filename = args.file_path
|
2024-08-29 10:37:42 +02:00
|
|
|
datasets_name = args.datasets
|
2024-08-26 20:12:47 +02:00
|
|
|
|
|
|
|
with open('./output_graphs/' + filename, 'r') as f:
|
|
|
|
texts = f.read()
|
|
|
|
df = process_graph_data(texts)
|
|
|
|
valid = 0
|
|
|
|
not_valid = 0
|
|
|
|
scores = []
|
2024-08-29 10:37:42 +02:00
|
|
|
|
|
|
|
# 定义分类标准和分布字典的映射
|
|
|
|
thresholds = {
|
|
|
|
'cifar10': [90, 91, 92, 93, 94],
|
|
|
|
'cifar100': [68,69,70, 71, 72, 73]
|
|
|
|
}
|
|
|
|
dist = {f'<{threshold}': 0 for threshold in thresholds[datasets_name]}
|
|
|
|
dist[f'>{thresholds[datasets_name][-1]}'] = 0
|
|
|
|
|
2024-08-26 20:12:47 +02:00
|
|
|
for i in range(len(df)):
|
|
|
|
nodes = df['nodes'][i]
|
|
|
|
edges = df['edges'][i]
|
|
|
|
result = is_valid_nasbench201(edges, nodes)
|
|
|
|
if result:
|
|
|
|
valid += 1
|
|
|
|
arch_str = nodes_to_arch_str(nodes)
|
|
|
|
index = api.query_index_by_arch(arch_str)
|
2024-08-29 10:37:42 +02:00
|
|
|
res = api.get_more_info(index, datasets_name, None, hp=200, is_random=False)
|
2024-08-26 20:12:47 +02:00
|
|
|
acc = res['test-accuracy']
|
|
|
|
scores.append((index, acc))
|
2024-08-29 10:37:42 +02:00
|
|
|
|
|
|
|
# 根据阈值更新分布
|
|
|
|
updated = False
|
|
|
|
for threshold in thresholds[datasets_name]:
|
|
|
|
if acc < threshold:
|
|
|
|
dist[f'<{threshold}'] += 1
|
|
|
|
updated = True
|
|
|
|
break
|
|
|
|
if not updated:
|
|
|
|
dist[f'>{thresholds[datasets_name][-1]}'] += 1
|
2024-08-26 20:12:47 +02:00
|
|
|
else:
|
|
|
|
not_valid += 1
|
2024-08-29 10:37:42 +02:00
|
|
|
|
|
|
|
with open('./output_graphs/' + filename + '_' + datasets_name +'.json', 'w') as f:
|
2024-08-26 20:12:47 +02:00
|
|
|
json.dump(scores, f)
|
2024-08-29 10:37:42 +02:00
|
|
|
|
2024-08-26 20:12:47 +02:00
|
|
|
print(scores)
|
|
|
|
print(valid, not_valid)
|
|
|
|
print(dist)
|
|
|
|
print("mean: ", np.mean([x[1] for x in scores]))
|
|
|
|
print("max: ", np.max([x[1] for x in scores]))
|
|
|
|
print("min: ", np.min([x[1] for x in scores]))
|
2024-08-29 10:37:42 +02:00
|
|
|
|
2024-08-26 20:12:47 +02:00
|
|
|
|