explore the 201 space script

set batch_y to 1 and want to test 15625
set y's points
2024-08-21 10:26:02 +02:00 · 2024-08-20 22:15:25 +02:00 · 2024-08-20 21:57:47 +02:00 · 2024-08-20 09:24:39 +02:00
5 changed files with 150 additions and 29 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -32,7 +32,7 @@ model:
    ensure_connected: True
 train:
    # n_epochs: 5000
-    n_epochs: 10
+    n_epochs: 500
    batch_size: 1200
    lr: 0.0002
    clip_grad: null
--- a/graph_dit/datasets/dataset.py
+++ b/graph_dit/datasets/dataset.py
@ -25,7 +25,9 @@ from sklearn.model_selection import train_test_split
 import utils as utils
 from datasets.abstract_dataset import AbstractDatasetInfos, AbstractDataModule
 from diffusion.distributions import DistributionNodes
-# from naswot.score_networks import get_nasbench201_idx_score
+from naswot.score_networks import get_nasbench201_idx_score
 from naswot import nasspace
 from naswot import datasets as dt
 import networkx as nx
@ -682,7 +684,7 @@ class Dataset(InMemoryDataset):
        data_list = []
        # len_data = len(self.api)
-        len_data = 1000
+        len_data = 15625
        def check_valid_graph(nodes, edges):
            if len(nodes) != edges.shape[0] or len(nodes) != edges.shape[1]:
                return False
@ -745,11 +747,9 @@ class Dataset(InMemoryDataset):
            print(f'edges size: {edges.shape}, nodes size: {len(nodes)}')
            return  edges,nodes
        def get_nasbench_201_val(idx):
            pass
-        # def graph_to_graph_data(graph, idx):
+        def graph_to_graph_data(graph, idx, train_loader, searchspace, args, device):
-        def graph_to_graph_data(graph):
+        # def graph_to_graph_data(graph):
            ops = graph[1]
            adj = graph[0]
            nodes = []
@ -770,12 +770,49 @@ class Dataset(InMemoryDataset):
            edge_index = torch.tensor(edges_list, dtype=torch.long).t()
            edge_type = torch.tensor(edge_type, dtype=torch.long)
            edge_attr = edge_type
-            y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
+            # y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
-            # y = get_nasbench_201_val(idx)
+            y = get_nasbench201_idx_score(idx, train_loader, searchspace, args, device)
-            data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
+            print(y, idx)
            if y > 1600:
                print(f'idx={idx}, y={y}')
                y = torch.tensor([1, 1], dtype=torch.float).view(1, -1)
                data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
            else:
                print(f'idx={idx}, y={y}')
                y = torch.tensor([0, 0], dtype=torch.float).view(1, -1)
                data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
                # return None
            return data
        graph_list = []
-
+        class Args:
            pass
        args = Args()
        args.trainval = True
        args.augtype = 'none'
        args.repeat = 1
        args.score = 'hook_logdet'
        args.sigma = 0.05
        args.nasspace = 'nasbench201'
        args.batch_size = 128
        args.GPU = '0'
        args.dataset = 'cifar10'
        args.api_loc = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
        args.data_loc = '../cifardata/'
        args.seed = 777
        args.init = ''
        args.save_loc = 'results'
        args.save_string = 'naswot'
        args.dropout = False
        args.maxofn = 1
        args.n_samples = 100
        args.n_runs = 500
        args.stem_out_channels = 16
        args.num_stacks = 3
        args.num_modules_per_stack = 3
        args.num_labels = 1
        searchspace = nasspace.get_search_space(args)
        train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
        device = torch.device('cuda:2')
        with tqdm(total = len_data) as pbar:
            active_nodes = set()
            file_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json'
@ -785,6 +822,7 @@ class Dataset(InMemoryDataset):
            flex_graph_list = []
            flex_graph_path = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json'
            for graph in graph_list:
                print(f'iterate every graph in graph_list, here is {i}')
                # arch_info = self.api.query_meta_info_by_index(i)
                # results = self.api.query_by_index(i, 'cifar100')
                arch_info = graph['arch_str']
@ -796,8 +834,11 @@ class Dataset(InMemoryDataset):
                for op in ops:
                    if op not in active_nodes:
                        active_nodes.add(op)
-                
+                data = graph_to_graph_data((adj_matrix, ops),idx=i, train_loader=train_loader, searchspace=searchspace, args=args, device=device) 
-                data = graph_to_graph_data((adj_matrix, ops)) 
+                i += 1
                if data is None:
                    pbar.update(1)
                    continue
                # with open(flex_graph_path, 'a') as f:
                #     flex_graph = {
                #         'adj_matrix': adj_matrix,
@ -816,18 +857,12 @@ class Dataset(InMemoryDataset):
                        f.write(str(data.edge_attr))
                data_list.append(data)
-                new_adj, new_ops = generate_flex_adj_mat(ori_nodes=ori_nodes, ori_edges=ori_adj, max_nodes=12, min_nodes=9,  random_ratio=0.5)
+                # new_adj, new_ops = generate_flex_adj_mat(ori_nodes=ori_nodes, ori_edges=ori_adj, max_nodes=12, min_nodes=9,  random_ratio=0.5)
-                flex_graph_list.append({
+                # flex_graph_list.append({
-                    'adj_matrix':new_adj.tolist(),
+                #     'adj_matrix':new_adj.tolist(),
-                    'ops': new_ops,
+                #     'ops': new_ops,
-                })
+                # })
-                # with open(flex_graph_path, 'w') as f:
+                # data_list.append(graph_to_graph_data((new_adj, new_ops)))
                #     flex_graph = {
                #         'adj_matrix': new_adj.tolist(),
                #         'ops': new_ops,
                #     }
                #     json.dump(flex_graph, f)
                data_list.append(graph_to_graph_data((new_adj, new_ops)))
                # graph_list.append({
                #     "adj_matrix": adj_matrix,
@ -859,6 +894,7 @@ class Dataset(InMemoryDataset):
                #         "seed": seed,
                #     }for seed, result in results.items()]
                # })
                # i += 1
                pbar.update(1)
        for graph in graph_list:
@ -872,8 +908,8 @@ class Dataset(InMemoryDataset):
                graph['ops'] = ops
        with open(f'nasbench-201-graph.json', 'w') as f:
            json.dump(graph_list, f)
-        with open(flex_graph_path, 'w') as f:
+        # with open(flex_graph_path, 'w') as f:
-            json.dump(flex_graph_list, f)
+            # json.dump(flex_graph_list, f)
        torch.save(self.collate(data_list), self.processed_paths[0])
@ -1148,7 +1184,8 @@ class DataInfos(AbstractDatasetInfos):
            #         ops_type[op] = len(ops_type)
            # len_ops.add(len(ops))
            # graphs.append((adj_matrix, ops))
-        graphs = read_adj_ops_from_json(f'/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json')
+        # graphs = read_adj_ops_from_json(f'/nfs/data3/hanzhang/nasbenchDiT/graph_dit/flex-nasbench201-graph.json')
        graphs = read_adj_ops_from_json(f'/nfs/data3/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json')
        # check first five graphs
        for i in range(5):
--- a/graph_dit/diffusion_model.py
+++ b/graph_dit/diffusion_model.py
@ -356,7 +356,8 @@ class Graph_DiT(pl.LightningModule):
            to_generate = min(samples_left_to_generate, bs)
            to_save = min(samples_left_to_save, bs)
            chains_save = min(chains_left_to_save, bs)
-            batch_y = test_y_collection[batch_id : batch_id + to_generate]
+            # batch_y = test_y_collection[batch_id : batch_id + to_generate]
            batch_y = torch.ones(to_generate, self.ydim_output, device=self.device)
            cur_sample = self.sample_batch(batch_id, to_generate, batch_y, save_final=to_save,
                                            keep_chain=chains_save, number_chain_steps=self.number_chain_steps)
--- a/graph_dit/exp_201/main.py
+++ b/graph_dit/exp_201/main.py
@ -0,0 +1,82 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 from nas_201_api import NASBench201API as API
 from naswot.score_networks import get_nasbench201_idx_score
 from naswot import datasets as dt
 from naswot import nasspace
 class Args():
    pass
 args = Args()
 args.trainval = True
 args.augtype = 'none'
 args.repeat = 1
 args.score = 'hook_logdet'
 args.sigma = 0.05
 args.nasspace = 'nasbench201'
 args.batch_size = 128
 args.GPU = '0'
 args.dataset = 'cifar10'
 args.api_loc = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
 args.data_loc = '../cifardata/'
 args.seed = 777
 args.init = ''
 args.save_loc = 'results'
 args.save_string = 'naswot'
 args.dropout = False
 args.maxofn = 1
 args.n_samples = 100
 args.n_runs = 500
 args.stem_out_channels = 16
 args.num_stacks = 3
 args.num_modules_per_stack = 3
 args.num_labels = 1
 searchspace = nasspace.get_search_space(args)
 train_loader = dt.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
 device = torch.device('cuda:2')
 # source = '/nfs/data3/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
 # api = API(source)
 # 示例百分数列表，精确到小数点后两位
 # percentages = [5.12, 15.78, 25.43, 35.22, 45.99, 55.34, 65.12, 75.68, 85.99, 95.25, 23.45, 12.34, 37.89, 58.67, 64.23, 72.15, 81.76, 99.99, 42.11, 61.58, 77.34, 14.56]
 percentages = []
 len_201 = 15625
 for i in range(len_201):
    percentage = get_nasbench201_idx_score(i, train_loader, searchspace, args, device)
    percentages.append(percentage)
 # 定义10%区间
 bins = [i for i in range(0, 101, 10)]
 # 对数据进行分箱，计算每个区间的数据量
 hist, bin_edges = pd.cut(percentages, bins=bins, right=False, retbins=True, include_lowest=True)
 bin_counts = hist.value_counts().sort_index()
 total_counts = len(percentages)
 percentages_in_bins = (bin_counts / total_counts) * 100
 # 绘制条形图
 plt.figure(figsize=(10, 6))
 bars = plt.bar(bin_counts.index.astype(str), bin_counts.values, width=0.9, color='skyblue')
 for bar, percentage in zip(bars, percentages_in_bins):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(),
            f'{percentage:.2f}%', ha='center', va='bottom')
 # 添加标题和标签
 plt.title('Distribution of Percentages in 10% Intervals')
 plt.xlabel('Percentage Interval')
 plt.ylabel('Count')
 # 显示图表
 plt.xticks(rotation=45)
 plt.savefig('barplog.png')
--- a/graph_dit/nasbench-201-meta.json
+++ b/graph_dit/nasbench-201-meta.json
@ -0,0 +1 @@
 {"source": "nasbench-201", "num_graph": 15625, "n_nodes_per_graph": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "max_n_nodes": 8, "max_n_edges": 8, "node_type_list": [0.125, 0.15, 0.15, 0.15, 0.15, 0.15, 0.125, 0.0], "edge_type_list": [0.6666666666666666, 0.3333333333333333], "valencies": [0.125, 0.15, 0.15, 0.15, 0.15, 0.15, 0.125, 0.0], "active_nodes": ["*", "input", "nor_conv_1x1", "nor_conv_3x3", "avg_pool_3x3", "skip_connect", "none"], "num_active_nodes": 7, "transition_E": [[[1.0, 0.0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [1.0, 0.0], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[1.0, 0.0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [1.0, 0.0], [1.0, 0.0]], [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]]}
Author	SHA1	Message	Date
mhz	6d9db64a48	explore the 201 space script	2024-08-21 10:26:02 +02:00
mhz	3950a8438d	set batch_y to 1 and want to test 15625	2024-08-20 22:15:25 +02:00
mhz	1fa2d49c11	set y's points	2024-08-20 21:57:47 +02:00
mhz	3c92e754d3	update the nasbench data	2024-08-20 09:24:39 +02:00
		`@ -0,0 +1 @@`
							{"source": "nasbench-201", "num_graph": 15625, "n_nodes_per_graph": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "max_n_nodes": 8, "max_n_edges": 8, "node_type_list": [0.125, 0.15, 0.15, 0.15, 0.15, 0.15, 0.125, 0.0], "edge_type_list": [0.6666666666666666, 0.3333333333333333], "valencies": [0.125, 0.15, 0.15, 0.15, 0.15, 0.15, 0.125, 0.0], "active_nodes": ["*", "input", "nor_conv_1x1", "nor_conv_3x3", "avg_pool_3x3", "skip_connect", "none"], "num_active_nodes": 7, "transition_E": [[[1.0, 0.0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [1.0, 0.0], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[0.5, 0.5], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.7333333333333333, 0.26666666666666666], [0.5, 0.5], [1.0, 0.0]], [[1.0, 0.0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [1.0, 0.0], [1.0, 0.0]], [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]]}