Compare commits

...

4 Commits

Author SHA1 Message Date
mhz
6d9db64a48 explore the 201 space script 2024-08-21 10:26:02 +02:00
mhz
3950a8438d set batch_y to 1 and want to test 15625 2024-08-20 22:15:25 +02:00
mhz
1fa2d49c11 set y's points 2024-08-20 21:57:47 +02:00
mhz
3c92e754d3 update the nasbench data 2024-08-20 09:24:39 +02:00
5 changed files with 150 additions and 29 deletions

View File

@ -32,7 +32,7 @@ model:
ensure_connected: True ensure_connected: True
train: train:
# n_epochs: 5000 # n_epochs: 5000
n_epochs: 10 n_epochs: 500
batch_size: 1200 batch_size: 1200
lr: 0.0002 lr: 0.0002
clip_grad: null clip_grad: null

View File

@ -25,7 +25,9 @@ from sklearn.model_selection import train_test_split
import utils as utils import utils as utils
from datasets.abstract_dataset import AbstractDatasetInfos, AbstractDataModule from datasets.abstract_dataset import AbstractDatasetInfos, AbstractDataModule
from diffusion.distributions import DistributionNodes from diffusion.distributions import DistributionNodes
# from naswot.score_networks import get_nasbench201_idx_score from naswot.score_networks import get_nasbench201_idx_score
from naswot import nasspace
from naswot import datasets as dt
import networkx as nx import networkx as nx
@ -682,7 +684,7 @@ class Dataset(InMemoryDataset):
data_list = [] data_list = []
# len_data = len(self.api) # len_data = len(self.api)
len_data = 1000 len_data = 15625
def check_valid_graph(nodes, edges): def check_valid_graph(nodes, edges):
if len(nodes) != edges.shape[0] or len(nodes) != edges.shape[1]: if len(nodes) != edges.shape[0] or len(nodes) != edges.shape[1]:
return False return False
@ -745,11 +747,9 @@ class Dataset(InMemoryDataset):
print(f'edges size: {edges.shape}, nodes size: {len(nodes)}') print(f'edges size: {edges.shape}, nodes size: {len(nodes)}')
return edges,nodes return edges,nodes
def get_nasbench_201_val(idx):
pass
# def graph_to_graph_data(graph, idx): def graph_to_graph_data(graph, idx, train_loader, searchspace, args, device):
def graph_to_graph_data(graph): # def graph_to_graph_data(graph):
ops = graph[1] ops = graph[1]
adj = graph[0] adj = graph[0]
nodes = [] nodes = []
@ -770,12 +770,49 @@ class Dataset(InMemoryDataset):
edge_index = torch.tensor(edges_list, dtype=torch.long).t() edge_index = torch.tensor(edges_list, dtype=torch.long).t()
edge_type = torch.tensor(edge_type, dtype=torch.long) edge_type = torch.tensor(edge_type, dtype=torch.long)
edge_attr = edge_type edge_attr = edge_type
y = torch.tensor([0, 0], dtype=torch.float).view(1, -1) # y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
# y = get_nasbench_201_val(idx) y = get_nasbench201_idx_score(idx, train_loader, searchspace, args, device)
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i) print(y, idx)
if y > 1600:
print(f'idx={idx}, y={y}')
y = torch.tensor([1, 1], dtype=torch.float).view(1, -1)
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
else:
print(f'idx={idx}, y={y}')
y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
# return None
return data return data
graph_list = [] graph_list = []
class Args:
pass
args = Args()
args.trainval = True
args.augtype = 'none'
args.repeat = 1
args.score = 'hook_logdet'
args.sigma = 0.05
args.nasspace = 'nasbench201'
args.batch_size = 128
args.GPU = '0'
args.dataset = 'cifar10'
args.api_loc = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
args.data_loc = '../cifardata/'
args.seed = 777
args.init = ''
args.save_loc = 'results'
args.save_string = 'naswot'
args.dropout = False
args.maxofn = 1
args.n_samples = 100
args.n_runs = 500
args.stem_out_channels = 16
args.num_stacks = 3
args.num_modules_per_stack = 3
args.num_labels = 1
searchspace = nasspace.get_search_space(args)
train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
device = torch.device('cuda:2')
with tqdm(total = len_data) as pbar: with tqdm(total = len_data) as pbar:
active_nodes = set() active_nodes = set()
file_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json' file_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json'
@ -785,6 +822,7 @@ class Dataset(InMemoryDataset):
flex_graph_list = [] flex_graph_list = []
flex_graph_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json' flex_graph_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json'
for graph in graph_list: for graph in graph_list:
print(f'iterate every graph in graph_list, here is {i}')
# arch_info = self.api.query_meta_info_by_index(i) # arch_info = self.api.query_meta_info_by_index(i)
# results = self.api.query_by_index(i, 'cifar100') # results = self.api.query_by_index(i, 'cifar100')
arch_info = graph['arch_str'] arch_info = graph['arch_str']
@ -796,8 +834,11 @@ class Dataset(InMemoryDataset):
for op in ops: for op in ops:
if op not in active_nodes: if op not in active_nodes:
active_nodes.add(op) active_nodes.add(op)
data = graph_to_graph_data((adj_matrix, ops),idx=i, train_loader=train_loader, searchspace=searchspace, args=args, device=device)
data = graph_to_graph_data((adj_matrix, ops)) i += 1
if data is None:
pbar.update(1)
continue
# with open(flex_graph_path, 'a') as f: # with open(flex_graph_path, 'a') as f:
# flex_graph = { # flex_graph = {
# 'adj_matrix': adj_matrix, # 'adj_matrix': adj_matrix,
@ -816,18 +857,12 @@ class Dataset(InMemoryDataset):
f.write(str(data.edge_attr)) f.write(str(data.edge_attr))
data_list.append(data) data_list.append(data)
new_adj, new_ops = generate_flex_adj_mat(ori_nodes=ori_nodes, ori_edges=ori_adj, max_nodes=12, min_nodes=9, random_ratio=0.5) # new_adj, new_ops = generate_flex_adj_mat(ori_nodes=ori_nodes, ori_edges=ori_adj, max_nodes=12, min_nodes=9, random_ratio=0.5)
flex_graph_list.append({ # flex_graph_list.append({
'adj_matrix':new_adj.tolist(), # 'adj_matrix':new_adj.tolist(),
'ops': new_ops, # 'ops': new_ops,
}) # })
# with open(flex_graph_path, 'w') as f: # data_list.append(graph_to_graph_data((new_adj, new_ops)))
# flex_graph = {
# 'adj_matrix': new_adj.tolist(),
# 'ops': new_ops,
# }
# json.dump(flex_graph, f)
data_list.append(graph_to_graph_data((new_adj, new_ops)))
# graph_list.append({ # graph_list.append({
# "adj_matrix": adj_matrix, # "adj_matrix": adj_matrix,
@ -859,6 +894,7 @@ class Dataset(InMemoryDataset):
# "seed": seed, # "seed": seed,
# }for seed, result in results.items()] # }for seed, result in results.items()]
# }) # })
# i += 1
pbar.update(1) pbar.update(1)
for graph in graph_list: for graph in graph_list:
@ -872,8 +908,8 @@ class Dataset(InMemoryDataset):
graph['ops'] = ops graph['ops'] = ops
with open(f'nasbench-201-graph.json', 'w') as f: with open(f'nasbench-201-graph.json', 'w') as f:
json.dump(graph_list, f) json.dump(graph_list, f)
with open(flex_graph_path, 'w') as f: # with open(flex_graph_path, 'w') as f:
json.dump(flex_graph_list, f) # json.dump(flex_graph_list, f)
torch.save(self.collate(data_list), self.processed_paths[0]) torch.save(self.collate(data_list), self.processed_paths[0])
@ -1148,7 +1184,8 @@ class DataInfos(AbstractDatasetInfos):
# ops_type[op] = len(ops_type) # ops_type[op] = len(ops_type)
# len_ops.add(len(ops)) # len_ops.add(len(ops))
# graphs.append((adj_matrix, ops)) # graphs.append((adj_matrix, ops))
graphs = read_adj_ops_from_json(f'/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json') # graphs = read_adj_ops_from_json(f'/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json')
graphs = read_adj_ops_from_json(f'/nfs/data3/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json')
# check first five graphs # check first five graphs
for i in range(5): for i in range(5):

View File

@ -356,7 +356,8 @@ class Graph_DiT(pl.LightningModule):
to_generate = min(samples_left_to_generate, bs) to_generate = min(samples_left_to_generate, bs)
to_save = min(samples_left_to_save, bs) to_save = min(samples_left_to_save, bs)
chains_save = min(chains_left_to_save, bs) chains_save = min(chains_left_to_save, bs)
batch_y = test_y_collection[batch_id : batch_id + to_generate] # batch_y = test_y_collection[batch_id : batch_id + to_generate]
batch_y = torch.ones(to_generate, self.ydim_output, device=self.device)
cur_sample = self.sample_batch(batch_id, to_generate, batch_y, save_final=to_save, cur_sample = self.sample_batch(batch_id, to_generate, batch_y, save_final=to_save,
keep_chain=chains_save, number_chain_steps=self.number_chain_steps) keep_chain=chains_save, number_chain_steps=self.number_chain_steps)

82
graph_dit/exp_201/main.py Normal file
View File

@ -0,0 +1,82 @@
import matplotlib.pyplot as plt
import pandas as pd
from nas_201_api import NASBench201API as API
from naswot.score_networks import get_nasbench201_idx_score
from naswot import datasets as dt
from naswot import nasspace
class Args():
pass
args = Args()
args.trainval = True
args.augtype = 'none'
args.repeat = 1
args.score = 'hook_logdet'
args.sigma = 0.05
args.nasspace = 'nasbench201'
args.batch_size = 128
args.GPU = '0'
args.dataset = 'cifar10'
args.api_loc = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
args.data_loc = '../cifardata/'
args.seed = 777
args.init = ''
args.save_loc = 'results'
args.save_string = 'naswot'
args.dropout = False
args.maxofn = 1
args.n_samples = 100
args.n_runs = 500
args.stem_out_channels = 16
args.num_stacks = 3
args.num_modules_per_stack = 3
args.num_labels = 1
searchspace = nasspace.get_search_space(args)
train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
device = torch.device('cuda:2')
# source = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
# api = API(source)
# 示例百分数列表,精确到小数点后两位
# percentages = [5.12, 15.78, 25.43, 35.22, 45.99, 55.34, 65.12, 75.68, 85.99, 95.25, 23.45, 12.34, 37.89, 58.67, 64.23, 72.15, 81.76, 99.99, 42.11, 61.58, 77.34, 14.56]
percentages = []
len_201 = 15625
for i in range(len_201):
percentage = get_nasbench201_idx_score(i, train_loader, searchspace, args, device)
percentages.append(percentage)
# 定义10%区间
bins = [i for i in range(0, 101, 10)]
# 对数据进行分箱,计算每个区间的数据量
hist, bin_edges = pd.cut(percentages, bins=bins, right=False, retbins=True, include_lowest=True)
bin_counts = hist.value_counts().sort_index()
total_counts = len(percentages)
percentages_in_bins = (bin_counts / total_counts) * 100
# 绘制条形图
plt.figure(figsize=(10, 6))
bars = plt.bar(bin_counts.index.astype(str), bin_counts.values, width=0.9, color='skyblue')
for bar, percentage in zip(bars, percentages_in_bins):
plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(),
f'{percentage:.2f}%', ha='center', va='bottom')
# 添加标题和标签
plt.title('Distribution of Percentages in 10% Intervals')
plt.xlabel('Percentage Interval')
plt.ylabel('Count')
# 显示图表
plt.xticks(rotation=45)
plt.savefig('barplog.png')

View File

@ -0,0 +1 @@
{"source": "nasbench-201", "num_graph": 15625, "n_nodes_per_graph": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "max_n_nodes": 8, "max_n_edges": 8, "node_type_list": [0.125, 0.15, 0.15, 0.15, 0.15, 0.15, 0.125, 0.0], "edge_type_list": [0.6666666666666666, 0.3333333333333333], "valencies": [0.125, 0.15, 0.15, 0.15, 0.15, 0.15, 0.125, 0.0], "active_nodes": ["*", "input", "nor_conv_1x1", "nor_conv_3x3", "avg_pool_3x3", "skip_connect", "none"], "num_active_nodes": 7, "transition_E": [[[1.0, 0.0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [1.0, 0.0], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[1.0, 0.0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [1.0, 0.0], [1.0, 0.0]], [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]]}