Compare commits

...

2 Commits

Author SHA1 Message Date
mhz
82183d3df7 add read swap csv codes 2024-08-29 09:25:15 +02:00
mhz
c86db9b6ba add a test performance script. 2024-08-26 20:12:47 +02:00
2 changed files with 147 additions and 14 deletions

View File

@ -771,9 +771,10 @@ class Dataset(InMemoryDataset):
edge_type = torch.tensor(edge_type, dtype=torch.long)
edge_attr = edge_type
# y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
y = get_nasbench201_idx_score(idx, train_loader, searchspace, args, device)
# y = get_nasbench201_idx_score(idx, train_loader, searchspace, args, device)
y = self.swap_scores[idx]
print(y, idx)
if y > 1600:
if y > 60000:
print(f'idx={idx}, y={y}')
y = torch.tensor([1, 1], dtype=torch.float).view(1, -1)
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
@ -812,6 +813,14 @@ class Dataset(InMemoryDataset):
args.num_labels = 1
searchspace = nasspace.get_search_space(args)
train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
self.swap_scores = []
import csv
# with open('/nfs/data3/hanzhang/nasbenchDiT/graph_dit/swap_results.csv', 'r') as f:
with open('/nfs/data3/hanzhang/nasbenchDiT/graph_dit/swap_results_cifar100.csv', 'r') as f:
reader = csv.reader(f)
header = next(reader)
data = [row for row in reader]
self.swap_scores = [float(row[0]) for row in data]
device = torch.device('cuda:2')
with tqdm(total = len_data) as pbar:
active_nodes = set()
@ -823,14 +832,8 @@ class Dataset(InMemoryDataset):
flex_graph_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json'
for graph in graph_list:
print(f'iterate every graph in graph_list, here is {i}')
# arch_info = self.api.query_meta_info_by_index(i)
# results = self.api.query_by_index(i, 'cifar100')
arch_info = graph['arch_str']
# results =
# nodes, edges = parse_architecture_string(arch_info.arch_str)
# ops, adj_matrix = parse_architecture_string(arch_info.arch_str, padding=4)
ops, adj_matrix, ori_nodes, ori_adj = parse_architecture_string(arch_info, padding=4)
# adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)
for op in ops:
if op not in active_nodes:
active_nodes.add(op)
@ -839,12 +842,6 @@ class Dataset(InMemoryDataset):
if data is None:
pbar.update(1)
continue
# with open(flex_graph_path, 'a') as f:
# flex_graph = {
# 'adj_matrix': adj_matrix,
# 'ops': ops,
# }
# json.dump(flex_graph, f)
flex_graph_list.append({
'adj_matrix':adj_matrix,
'ops': ops,

136
graph_dit/test_perf.py Normal file
View File

@ -0,0 +1,136 @@
from nas_201_api import NASBench201API as API
import re
import pandas as pd
import json
import numpy as np
import argparse
api = API('./NAS-Bench-201-v1_1-096897.pth')
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--file_path', type=str, default='211035.txt',)
args = parser.parse_args()
def process_graph_data(text):
# Split the input text into sections for each graph
graph_sections = text.strip().split('nodes:')
# Prepare lists to store data
nodes_list = []
edges_list = []
results_list = []
for section in graph_sections[1:]:
# Extract nodes
nodes_section = section.split('edges:')[0]
nodes_match = re.search(r'(tensor\(\d+\) ?)+', section)
if nodes_match:
nodes = re.findall(r'tensor\((\d+)\)', nodes_match.group(0))
nodes_list.append(nodes)
# Extract edges
edge_section = section.split('edges:')[1]
edges_match = re.search(r'edges:', section)
if edges_match:
edges = re.findall(r'tensor\((\d+)\)', edge_section)
edges_list.append(edges)
# Extract the last floating point number as a result
# Create a DataFrame to store the extracted data
data = {
'nodes': nodes_list,
'edges': edges_list,
}
data['nodes'] = [[int(x) for x in node] for node in data['nodes']]
data['edges'] = [[int(x) for x in edge] for edge in data['edges']]
def split_list(input_list, chunk_size):
return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
data['edges'] = [split_list(edge, 8) for edge in data['edges']]
print(data)
df = pd.DataFrame(data)
print('df')
print(df['nodes'][0], df['edges'][0])
return df
def is_valid_nasbench201(adj, ops):
print(ops)
if ops[0] != 0 or ops[-1] != 6:
return False
for i in range(2, len(ops) - 1):
if ops[i] not in [1, 2, 3, 4, 5]:
return False
adj_mat = [ [0, 1, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 1 ,0 ,0],
[0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0]]
for i in range(len(adj)):
for j in range(len(adj[i])):
if adj[i][j] not in [0, 1]:
return False
if j > i:
if adj[i][j] != adj_mat[i][j]:
return False
return True
num_to_op = ['input', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3', 'skip_connect', 'none', 'output']
def nodes_to_arch_str(nodes):
nodes_str = [num_to_op[node] for node in nodes]
arch_str = '|' + nodes_str[1] + '~0|+' + \
'|' + nodes_str[2] + '~0|' + nodes_str[3] + '~1|+' +\
'|' + nodes_str[4] + '~0|' + nodes_str[5] + '~1|' + nodes_str[6] + '~2|'
return arch_str
filename = args.file_path
with open('./output_graphs/' + filename, 'r') as f:
texts = f.read()
df = process_graph_data(texts)
valid = 0
not_valid = 0
scores = []
dist = {'<90':0, '<91':0, '<92':0, '<93':0, '<94':0, '>94':0}
for i in range(len(df)):
nodes = df['nodes'][i]
edges = df['edges'][i]
result = is_valid_nasbench201(edges, nodes)
if result:
valid += 1
arch_str = nodes_to_arch_str(nodes)
index = api.query_index_by_arch(arch_str)
# results = api.query_by_index(index, 'cifar10', hp='200')
# print(results)
# result = results[888].get_eval('ori-test')
res = api.get_more_info(index, 'cifar10', None, hp=200, is_random=False)
acc = res['test-accuracy']
scores.append((index, acc))
if acc < 90:
dist['<90'] += 1
elif acc < 91 and acc >= 90:
dist['<91'] += 1
elif acc < 92 and acc >= 91:
dist['<92'] += 1
elif acc < 93 and acc >= 92:
dist['<93'] += 1
elif acc < 94 and acc >= 93:
dist['<94'] += 1
else:
dist['>94'] += 1
else:
not_valid += 1
with open('./output_graphs/' + filename + '.json', 'w') as f:
json.dump(scores, f)
print(scores)
print(valid, not_valid)
print(dist)
print("mean: ", np.mean([x[1] for x in scores]))
print("max: ", np.max([x[1] for x in scores]))
print("min: ", np.min([x[1] for x in scores]))