no need to read the api again and again
This commit is contained in:
		| @@ -79,7 +79,7 @@ class DataModule(AbstractDataModule): | ||||
|         source = './NAS-Bench-201-v1_1-096897.pth' | ||||
|         dataset = Dataset(source=source, root=root_path, target_prop=target, transform=None) | ||||
|         self.dataset = dataset | ||||
|         self.api = dataset.api | ||||
|         # self.api = dataset.api | ||||
|  | ||||
|         # if len(self.task.split('-')) == 2: | ||||
|         #     train_index, val_index, test_index, unlabeled_index = self.fixed_split(dataset) | ||||
| @@ -628,12 +628,12 @@ class Dataset(InMemoryDataset): | ||||
|         self.target_prop = target_prop | ||||
|         source = '/home/stud/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth' | ||||
|         self.source = source | ||||
|         self.api = API(source)  # Initialize NAS-Bench-201 API | ||||
|         print('API loaded') | ||||
|         # self.api = API(source)  # Initialize NAS-Bench-201 API | ||||
|         # print('API loaded') | ||||
|         super().__init__(root, transform, pre_transform, pre_filter) | ||||
|         print(self.processed_paths[0]) #/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth.pt | ||||
|         print('Dataset initialized') | ||||
|         self.data, self.slices = torch.load(self.processed_paths[0]) | ||||
|         print('Dataset initialized') | ||||
|         self.data.edge_attr = self.data.edge_attr.squeeze() | ||||
|         self.data.idx = torch.arange(len(self.data.y)) | ||||
|         print(f"self.data={self.data}, self.slices={self.slices}") | ||||
| @@ -647,82 +647,146 @@ class Dataset(InMemoryDataset): | ||||
|         return [f'{self.source}.pt'] | ||||
|  | ||||
|     def process(self): | ||||
|         def parse_architecture_string(arch_str): | ||||
|             stages = arch_str.split('+') | ||||
|             nodes = ['input'] | ||||
|             edges = [] | ||||
|              | ||||
|             for stage in stages: | ||||
|                 operations = stage.strip('|').split('|') | ||||
|                 for op in operations: | ||||
|                     operation, idx = op.split('~') | ||||
|                     idx = int(idx) | ||||
|                     edges.append((idx, len(nodes)))  # Add edge from idx to the new node | ||||
|                     nodes.append(operation) | ||||
|             nodes.append('output')  # Add the output node | ||||
|             return nodes, edges | ||||
|         source = '/home/stud/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth' | ||||
|         self.api = API(source) | ||||
|  | ||||
|         def create_graph(nodes, edges): | ||||
|             G = nx.DiGraph() | ||||
|             for i, node in enumerate(nodes): | ||||
|                 G.add_node(i, label=node) | ||||
|             G.add_edges_from(edges) | ||||
|             return G | ||||
|  | ||||
|         def arch_to_graph(arch_str, sa, sc, target, target2=None, target3=None): | ||||
|             nodes, edges = parse_architecture_string(arch_str) | ||||
|  | ||||
|             node_labels = [bonds[node] for node in nodes]  # Replace with appropriate encoding if necessary | ||||
|             assert 0 not in node_labels, f'Invalid node label: {node_labels}' | ||||
|             x = torch.LongTensor(node_labels) | ||||
|             print(f'in initialize Dataset, arch_to_Graph x={x}') | ||||
|  | ||||
|             edges_list = [(start, end) for start, end in edges] | ||||
|             edge_type = [bonds[nodes[end]] for start, end in edges]  # Example: using end node type as edge type | ||||
|             edge_index = torch.tensor(edges_list, dtype=torch.long).t().contiguous() | ||||
|             edge_type = torch.tensor(edge_type, dtype=torch.long) | ||||
|             edge_attr = edge_type.view(-1, 1) | ||||
|  | ||||
|             if target3 is not None: | ||||
|                 y = torch.tensor([sa, sc, target, target2, target3], dtype=torch.float).view(1, -1) | ||||
|             elif target2 is not None: | ||||
|                 y = torch.tensor([sa, sc, target, target2], dtype=torch.float).view(1, -1) | ||||
|             else: | ||||
|                 y = torch.tensor([sa, sc, target], dtype=torch.float).view(1, -1) | ||||
|  | ||||
|             print(f'in initialize Dataset, Data_init, x={x}, y={y}, edge_index={edge_index}, edge_attr={edge_attr}') | ||||
|             data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) | ||||
|             return data, nodes | ||||
|  | ||||
|         bonds = { | ||||
|             'nor_conv_1x1': 1, | ||||
|             'nor_conv_3x3': 2, | ||||
|             'avg_pool_3x3': 3, | ||||
|             'skip_connect': 4, | ||||
|             'output': 5, | ||||
|             'none': 6, | ||||
|             'input': 7 | ||||
|         } | ||||
|  | ||||
|         # Prepare to process NAS-Bench-201 data | ||||
|         data_list = [] | ||||
|         len_data = len(self.api)  # Number of architectures | ||||
|         with tqdm(total=len_data) as pbar: | ||||
|             for arch_index in range(len_data): | ||||
|                 arch_info = self.api.query_meta_info_by_index(arch_index) | ||||
|                 arch_str = arch_info.arch_str | ||||
|                 sa = np.random.rand()  # Placeholder for synthetic accessibility | ||||
|                 sc = np.random.rand()  # Placeholder for substructure count | ||||
|                 target = np.random.rand()  # Placeholder for target value | ||||
|                 target2 = np.random.rand()  # Placeholder for second target value | ||||
|                 target3 = np.random.rand()  # Placeholder for third target value | ||||
|         len_data = len(self.api) | ||||
|  | ||||
|                 data, active_nodes = arch_to_graph(arch_str, sa, sc, target, target2, target3) | ||||
|         def graph_to_graph_data(graph): | ||||
|             ops = graph[1] | ||||
|             adj = graph[0] | ||||
|             nodes = [] | ||||
|             for op in ops: | ||||
|                 nodes.append(op_type[op]) | ||||
|             x = torch.LongTensor(nodes) | ||||
|  | ||||
|             edges_list = [] | ||||
|             edge_type = [] | ||||
|             for start in range(len(ops)): | ||||
|                 for end in range(len(ops)): | ||||
|                     if adj[start][end] == 1: | ||||
|                         edges_list.append((start, end)) | ||||
|                         edge_type.append(1) | ||||
|                         edges_list.append((end, start)) | ||||
|                         edge_type.append(1) | ||||
|              | ||||
|             edge_index = torch.tensor(edges_list, dtype=torch.long).t() | ||||
|             edge_type = torch.tensor(edge_type, dtype=torch.long) | ||||
|             edge_attr = edge_type | ||||
|             y = torch.tensor([0], dtype=torch.float).view(1, -1) | ||||
|             data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i) | ||||
|             return data | ||||
|         graph_list = [] | ||||
|  | ||||
|         with tqdm(total = len_data) as pbar: | ||||
|             active_nodes = set() | ||||
|             for i in range(len_data): | ||||
|                 arch_info = self.api.query_meta_info_by_index(i) | ||||
|                 nodes, edges = parse_architecture_string(arch_info.arch_str) | ||||
|                 adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges) | ||||
|                 for op in ops: | ||||
|                     if op not in active_nodes: | ||||
|                         active_nodes.add(op) | ||||
|                 graph_list.append({ | ||||
|                     "adj_matrix": adj_matrix, | ||||
|                     "ops": ops, | ||||
|                     "idx": i | ||||
|                 }) | ||||
|                 data = graph_to_graph_data((adj_matrix, ops))  | ||||
|                 data_list.append(data) | ||||
|                 pbar.update(1) | ||||
|  | ||||
|          | ||||
|         for graph in graph_list: | ||||
|             adj_matrix = graph['adj_matrix'] | ||||
|             if isinstance(adj_matrix, np.ndarray): | ||||
|                 adj_matrix = adj_matrix.tolist() | ||||
|                 graph['adj_matrix'] = adj_matrix | ||||
|             ops = graph['ops'] | ||||
|             if isinstance(ops, np.ndarray): | ||||
|                 ops = ops.tolist() | ||||
|                 graph['ops'] = ops | ||||
|         with open(f'nasbench-201-graph.json', 'w') as f: | ||||
|             json.dump(graph_list, f) | ||||
|              | ||||
|         torch.save(self.collate(data_list), self.processed_paths[0]) | ||||
|  | ||||
|         # def parse_architecture_string(arch_str): | ||||
|         #     stages = arch_str.split('+') | ||||
|         #     nodes = ['input'] | ||||
|         #     edges = [] | ||||
|              | ||||
|         #     for stage in stages: | ||||
|         #         operations = stage.strip('|').split('|') | ||||
|         #         for op in operations: | ||||
|         #             operation, idx = op.split('~') | ||||
|         #             idx = int(idx) | ||||
|         #             edges.append((idx, len(nodes)))  # Add edge from idx to the new node | ||||
|         #             nodes.append(operation) | ||||
|         #     nodes.append('output')  # Add the output node | ||||
|         #     return nodes, edges | ||||
|  | ||||
|         # def create_graph(nodes, edges): | ||||
|         #     G = nx.DiGraph() | ||||
|         #     for i, node in enumerate(nodes): | ||||
|         #         G.add_node(i, label=node) | ||||
|         #     G.add_edges_from(edges) | ||||
|         #     return G | ||||
|  | ||||
|         # def arch_to_graph(arch_str, sa, sc, target, target2=None, target3=None): | ||||
|         #     nodes, edges = parse_architecture_string(arch_str) | ||||
|  | ||||
|         #     node_labels = [bonds[node] for node in nodes]  # Replace with appropriate encoding if necessary | ||||
|         #     assert 0 not in node_labels, f'Invalid node label: {node_labels}' | ||||
|         #     x = torch.LongTensor(node_labels) | ||||
|         #     print(f'in initialize Dataset, arch_to_Graph x={x}') | ||||
|  | ||||
|         #     edges_list = [(start, end) for start, end in edges] | ||||
|         #     edge_type = [bonds[nodes[end]] for start, end in edges]  # Example: using end node type as edge type | ||||
|         #     edge_index = torch.tensor(edges_list, dtype=torch.long).t().contiguous() | ||||
|         #     edge_type = torch.tensor(edge_type, dtype=torch.long) | ||||
|         #     edge_attr = edge_type.view(-1, 1) | ||||
|  | ||||
|         #     if target3 is not None: | ||||
|         #         y = torch.tensor([sa, sc, target, target2, target3], dtype=torch.float).view(1, -1) | ||||
|         #     elif target2 is not None: | ||||
|         #         y = torch.tensor([sa, sc, target, target2], dtype=torch.float).view(1, -1) | ||||
|         #     else: | ||||
|         #         y = torch.tensor([sa, sc, target], dtype=torch.float).view(1, -1) | ||||
|  | ||||
|         #     print(f'in initialize Dataset, Data_init, x={x}, y={y}, edge_index={edge_index}, edge_attr={edge_attr}') | ||||
|         #     data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) | ||||
|         #     return data, nodes | ||||
|  | ||||
|         # bonds = { | ||||
|         #     'nor_conv_1x1': 1, | ||||
|         #     'nor_conv_3x3': 2, | ||||
|         #     'avg_pool_3x3': 3, | ||||
|         #     'skip_connect': 4, | ||||
|         #     'output': 5, | ||||
|         #     'none': 6, | ||||
|         #     'input': 7 | ||||
|         # } | ||||
|  | ||||
|         # # Prepare to process NAS-Bench-201 data | ||||
|         # data_list = [] | ||||
|         # len_data = len(self.api)  # Number of architectures | ||||
|         # with tqdm(total=len_data) as pbar: | ||||
|         #     for arch_index in range(len_data): | ||||
|         #         arch_info = self.api.query_meta_info_by_index(arch_index) | ||||
|         #         arch_str = arch_info.arch_str | ||||
|         #         sa = np.random.rand()  # Placeholder for synthetic accessibility | ||||
|         #         sc = np.random.rand()  # Placeholder for substructure count | ||||
|         #         target = np.random.rand()  # Placeholder for target value | ||||
|         #         target2 = np.random.rand()  # Placeholder for second target value | ||||
|         #         target3 = np.random.rand()  # Placeholder for third target value | ||||
|  | ||||
|         #         data, active_nodes = arch_to_graph(arch_str, sa, sc, target, target2, target3) | ||||
|         #         data_list.append(data) | ||||
|         #         pbar.update(1) | ||||
|  | ||||
|         # torch.save(self.collate(data_list), self.processed_paths[0]) | ||||
|  | ||||
| class Dataset_origin(InMemoryDataset): | ||||
|     def __init__(self, source, root, target_prop=None, | ||||
|                  transform=None, pre_transform=None, pre_filter=None): | ||||
| @@ -841,7 +905,7 @@ class DataInfos(AbstractDatasetInfos): | ||||
|         self.task = task_name | ||||
|         self.task_type = tasktype_dict.get(task_name, "regression") | ||||
|         self.ensure_connected = cfg.model.ensure_connected | ||||
|         self.api = dataset.api | ||||
|         # self.api = dataset.api | ||||
|  | ||||
|         datadir = cfg.dataset.datadir | ||||
|  | ||||
| @@ -853,20 +917,34 @@ class DataInfos(AbstractDatasetInfos): | ||||
|         ops_type = {} | ||||
|         len_ops = set() | ||||
|         # api = API('/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth') | ||||
|         for i in range(length): | ||||
|             arch_info = self.api.query_meta_info_by_index(i) | ||||
|             nodes, edges = parse_architecture_string(arch_info.arch_str) | ||||
|             adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)     | ||||
|  | ||||
|  | ||||
|         def read_adj_ops_from_json(filename): | ||||
|             with open(filename, 'r') as json_file: | ||||
|                 data = json.load(json_file) | ||||
|  | ||||
|             adj_ops_pairs = [] | ||||
|             for item in data: | ||||
|                 adj_matrix = np.array(item['adjacency_matrix']) | ||||
|                 ops = item['operations'] | ||||
|                 adj_ops_pairs.append((adj_matrix, ops)) | ||||
|              | ||||
|             return adj_ops_pairs | ||||
|         # for i in range(length): | ||||
|         #     arch_info = self.api.query_meta_info_by_index(i) | ||||
|         #     nodes, edges = parse_architecture_string(arch_info.arch_str) | ||||
|         #     adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)     | ||||
|             # if i < 5: | ||||
|             #     print("Adjacency Matrix:") | ||||
|             #     print(adj_matrix) | ||||
|             #     print("Operations List:") | ||||
|             #     print(ops) | ||||
|             for op in ops: | ||||
|                 if op not in ops_type: | ||||
|                     ops_type[op] = len(ops_type) | ||||
|             len_ops.add(len(ops)) | ||||
|             graphs.append((adj_matrix, ops)) | ||||
|             # for op in ops: | ||||
|             #     if op not in ops_type: | ||||
|             #         ops_type[op] = len(ops_type) | ||||
|             # len_ops.add(len(ops)) | ||||
|             # graphs.append((adj_matrix, ops)) | ||||
|         graphs = read_adj_ops_from_json(f'nasbench-201.meta.json') | ||||
|  | ||||
|         # check first five graphs | ||||
|         for i in range(5): | ||||
| @@ -879,13 +957,13 @@ class DataInfos(AbstractDatasetInfos): | ||||
|         self.max_n_nodes = meta_dict['max_n_nodes'] | ||||
|         self.original_max_n_nodes = meta_dict['max_n_nodes'] | ||||
|         self.n_nodes = torch.Tensor(meta_dict['n_nodes_per_graph']) | ||||
|         self.edge_types = torch.Tensor(meta_dict['edge_type_dist']) | ||||
|         self.edge_types = torch.Tensor(meta_dict['edge_type_list']) | ||||
|         self.transition_E = torch.Tensor(meta_dict['transition_E']) | ||||
|  | ||||
|         self.node_decoder = meta_dict['active_nodes'] | ||||
|         node_types = torch.Tensor(meta_dict['node_type_dist']) | ||||
|         node_types = torch.Tensor(meta_dict['node_type_list']) | ||||
|         active_index = (node_types > 0).nonzero().squeeze() | ||||
|         self.node_types = torch.Tensor(meta_dict['node_type_dist'])[active_index] | ||||
|         self.node_types = torch.Tensor(meta_dict['node_type_list'])[active_index] | ||||
|         self.nodes_dist = DistributionNodes(self.n_nodes) | ||||
|         self.active_index = active_index | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user