Compare commits
13 Commits
main
...
f5911be781
Author | SHA1 | Date | |
---|---|---|---|
f5911be781 | |||
be8bb16f61 | |||
0fc6f6e686 | |||
d57575586d | |||
7274b3f606 | |||
66fe70028e | |||
df26eef77c | |||
222470a43c | |||
a7f7010da7 | |||
14186fa97f | |||
a222c514d9 | |||
062a27b83f | |||
0c7c525680 |
@@ -116,7 +116,7 @@ class AbstractDatasetInfos:
|
|||||||
def compute_input_output_dims(self, datamodule):
|
def compute_input_output_dims(self, datamodule):
|
||||||
example_batch = datamodule.example_batch()
|
example_batch = datamodule.example_batch()
|
||||||
example_batch_x = torch.nn.functional.one_hot(example_batch.x, num_classes=118).float()[:, self.active_index]
|
example_batch_x = torch.nn.functional.one_hot(example_batch.x, num_classes=118).float()[:, self.active_index]
|
||||||
example_batch_edge_attr = torch.nn.functional.one_hot(example_batch.edge_attr, num_classes=10).float()
|
example_batch_edge_attr = torch.nn.functional.one_hot(example_batch.edge_attr, num_classes=2).float()
|
||||||
|
|
||||||
self.input_dims = {'X': example_batch_x.size(1),
|
self.input_dims = {'X': example_batch_x.size(1),
|
||||||
'E': example_batch_edge_attr.size(1),
|
'E': example_batch_edge_attr.size(1),
|
||||||
@@ -127,4 +127,19 @@ class AbstractDatasetInfos:
|
|||||||
print('input dims')
|
print('input dims')
|
||||||
print(self.input_dims)
|
print(self.input_dims)
|
||||||
print('output dims')
|
print('output dims')
|
||||||
|
print(self.output_dims)
|
||||||
|
def compute_graph_input_output_dims(self, datamodule):
|
||||||
|
example_batch = datamodule.example_batch()
|
||||||
|
example_batch_x = torch.nn.functional.one_hot(example_batch.x, num_classes=8).float()[:, self.active_index]
|
||||||
|
example_batch_edge_attr = torch.nn.functional.one_hot(example_batch.edge_attr, num_classes=2).float()
|
||||||
|
|
||||||
|
self.input_dims = {'X': example_batch_x.size(1),
|
||||||
|
'E': example_batch_edge_attr.size(1),
|
||||||
|
'y': example_batch['y'].size(1)}
|
||||||
|
self.output_dims = {'X': example_batch_x.size(1),
|
||||||
|
'E': example_batch_edge_attr.size(1),
|
||||||
|
'y': example_batch['y'].size(1)}
|
||||||
|
print('input dims')
|
||||||
|
print(self.input_dims)
|
||||||
|
print('output dims')
|
||||||
print(self.output_dims)
|
print(self.output_dims)
|
@@ -39,6 +39,16 @@ op_to_atom = {
|
|||||||
'none': 'S', # Sulfur for no operation
|
'none': 'S', # Sulfur for no operation
|
||||||
'output': 'He' # Helium for output
|
'output': 'He' # Helium for output
|
||||||
}
|
}
|
||||||
|
|
||||||
|
op_type = {
|
||||||
|
'nor_conv_1x1': 1,
|
||||||
|
'nor_conv_3x3': 2,
|
||||||
|
'avg_pool_3x3': 3,
|
||||||
|
'skip_connect': 4,
|
||||||
|
'output': 5,
|
||||||
|
'none': 6,
|
||||||
|
'input': 7
|
||||||
|
}
|
||||||
class DataModule(AbstractDataModule):
|
class DataModule(AbstractDataModule):
|
||||||
def __init__(self, cfg):
|
def __init__(self, cfg):
|
||||||
self.datadir = cfg.dataset.datadir
|
self.datadir = cfg.dataset.datadir
|
||||||
@@ -50,12 +60,12 @@ class DataModule(AbstractDataModule):
|
|||||||
|
|
||||||
def prepare_data(self) -> None:
|
def prepare_data(self) -> None:
|
||||||
target = getattr(self.cfg.dataset, 'guidance_target', None)
|
target = getattr(self.cfg.dataset, 'guidance_target', None)
|
||||||
print("target", target)
|
print("target", target) # nasbench-201
|
||||||
# try:
|
# try:
|
||||||
# base_path = pathlib.Path(os.path.realpath(__file__)).parents[2]
|
# base_path = pathlib.Path(os.path.realpath(__file__)).parents[2]
|
||||||
# except NameError:
|
# except NameError:
|
||||||
# base_path = pathlib.Path(os.getcwd()).parent[2]
|
# base_path = pathlib.Path(os.getcwd()).parent[2]
|
||||||
base_path = '/home/stud/hanzhang/Graph-Dit'
|
base_path = '/home/stud/hanzhang/nasbenchDiT'
|
||||||
root_path = os.path.join(base_path, self.datadir)
|
root_path = os.path.join(base_path, self.datadir)
|
||||||
self.root_path = root_path
|
self.root_path = root_path
|
||||||
|
|
||||||
@@ -68,13 +78,16 @@ class DataModule(AbstractDataModule):
|
|||||||
# Dataset has target property, root path, and transform
|
# Dataset has target property, root path, and transform
|
||||||
source = './NAS-Bench-201-v1_1-096897.pth'
|
source = './NAS-Bench-201-v1_1-096897.pth'
|
||||||
dataset = Dataset(source=source, root=root_path, target_prop=target, transform=None)
|
dataset = Dataset(source=source, root=root_path, target_prop=target, transform=None)
|
||||||
|
self.dataset = dataset
|
||||||
|
# self.api = dataset.api
|
||||||
|
|
||||||
# if len(self.task.split('-')) == 2:
|
# if len(self.task.split('-')) == 2:
|
||||||
# train_index, val_index, test_index, unlabeled_index = self.fixed_split(dataset)
|
# train_index, val_index, test_index, unlabeled_index = self.fixed_split(dataset)
|
||||||
# else:
|
# else:
|
||||||
train_index, val_index, test_index, unlabeled_index = self.random_data_split(dataset)
|
train_index, val_index, test_index, unlabeled_index = self.random_data_split(dataset)
|
||||||
|
|
||||||
self.train_index, self.val_index, self.test_index, self.unlabeled_index = train_index, val_index, test_index, unlabeled_index
|
self.train_index, self.val_index, self.test_index, self.unlabeled_index = (
|
||||||
|
train_index, val_index, test_index, unlabeled_index)
|
||||||
train_index, val_index, test_index, unlabeled_index = torch.LongTensor(train_index), torch.LongTensor(val_index), torch.LongTensor(test_index), torch.LongTensor(unlabeled_index)
|
train_index, val_index, test_index, unlabeled_index = torch.LongTensor(train_index), torch.LongTensor(val_index), torch.LongTensor(test_index), torch.LongTensor(unlabeled_index)
|
||||||
if len(unlabeled_index) > 0:
|
if len(unlabeled_index) > 0:
|
||||||
train_index = torch.cat([train_index, unlabeled_index], dim=0)
|
train_index = torch.cat([train_index, unlabeled_index], dim=0)
|
||||||
@@ -175,6 +188,27 @@ class DataModule(AbstractDataModule):
|
|||||||
smiles = Chem.MolToSmiles(mol)
|
smiles = Chem.MolToSmiles(mol)
|
||||||
return smiles
|
return smiles
|
||||||
|
|
||||||
|
def get_train_graphs(self):
|
||||||
|
train_graphs = []
|
||||||
|
test_graphs = []
|
||||||
|
for graph in self.train_dataset:
|
||||||
|
train_graphs.append(graph)
|
||||||
|
for graph in self.test_dataset:
|
||||||
|
test_graphs.append(graph)
|
||||||
|
return train_graphs, test_graphs
|
||||||
|
|
||||||
|
|
||||||
|
# def get_train_smiles(self):
|
||||||
|
# filename = f'{self.task}.csv.gz'
|
||||||
|
# df = pd.read_csv(f'{self.root_path}/raw/{filename}')
|
||||||
|
# df_test = df.iloc[self.test_index]
|
||||||
|
# df = df.iloc[self.train_index]
|
||||||
|
# smiles_list = df['smiles'].tolist()
|
||||||
|
# smiles_list_test = df_test['smiles'].tolist()
|
||||||
|
# smiles_list = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiles_list]
|
||||||
|
# smiles_list_test = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiles_list_test]
|
||||||
|
# return smiles_list, smiles_list_test
|
||||||
|
|
||||||
def get_train_smiles(self):
|
def get_train_smiles(self):
|
||||||
train_smiles = []
|
train_smiles = []
|
||||||
test_smiles = []
|
test_smiles = []
|
||||||
@@ -319,6 +353,121 @@ class DataModule_original(AbstractDataModule):
|
|||||||
def test_dataloader(self):
|
def test_dataloader(self):
|
||||||
return self.test_loader
|
return self.test_loader
|
||||||
|
|
||||||
|
def new_graphs_to_json(graphs, filename):
|
||||||
|
source_name = "nasbench-201"
|
||||||
|
num_graph = len(graphs)
|
||||||
|
|
||||||
|
node_name_list = []
|
||||||
|
node_count_list = []
|
||||||
|
node_name_list.append('*')
|
||||||
|
|
||||||
|
for op_name in op_type:
|
||||||
|
node_name_list.append(op_name)
|
||||||
|
node_count_list.append(0)
|
||||||
|
|
||||||
|
node_count_list.append(0)
|
||||||
|
n_nodes_per_graph = [0] * num_graph
|
||||||
|
edge_count_list = [0, 0]
|
||||||
|
valencies = [0] * (len(op_type) + 1)
|
||||||
|
transition_E = np.zeros((len(op_type) + 1, len(op_type) + 1, 2))
|
||||||
|
|
||||||
|
n_node_list = []
|
||||||
|
n_edge_list = []
|
||||||
|
|
||||||
|
for graph in graphs:
|
||||||
|
ops = graph[1]
|
||||||
|
adj = graph[0]
|
||||||
|
|
||||||
|
n_node = len(ops)
|
||||||
|
n_edge = len(ops)
|
||||||
|
n_node_list.append(n_node)
|
||||||
|
n_edge_list.append(n_edge)
|
||||||
|
|
||||||
|
n_nodes_per_graph[n_node] += 1
|
||||||
|
cur_node_count_arr = np.zeros(len(op_type) + 1)
|
||||||
|
|
||||||
|
for op in ops:
|
||||||
|
node = op
|
||||||
|
# if node == '*':
|
||||||
|
# node_count_list[-1] += 1
|
||||||
|
# cur_node_count_arr[-1] += 1
|
||||||
|
# else:
|
||||||
|
node_count_list[node] += 1
|
||||||
|
cur_node_count_arr[node] += 1
|
||||||
|
try:
|
||||||
|
valencies[node] += 1
|
||||||
|
except:
|
||||||
|
print('int(op_type[node])', int(node))
|
||||||
|
|
||||||
|
transition_E_temp = np.zeros((len(op_type) + 1, len(op_type) + 1, 2))
|
||||||
|
for i in range(n_node):
|
||||||
|
for j in range(n_node):
|
||||||
|
if i == j or adj[i][j] == 0:
|
||||||
|
continue
|
||||||
|
start_node, end_node = i, j
|
||||||
|
|
||||||
|
start_index = ops[start_node]
|
||||||
|
end_index = ops[end_node]
|
||||||
|
bond_index = 1
|
||||||
|
edge_count_list[bond_index] += 2
|
||||||
|
|
||||||
|
transition_E[start_index, end_index, bond_index] += 2
|
||||||
|
transition_E[end_index, start_index, bond_index] += 2
|
||||||
|
transition_E_temp[start_index, end_index, bond_index] += 2
|
||||||
|
transition_E_temp[end_index, start_index, bond_index] += 2
|
||||||
|
|
||||||
|
edge_count_list[0] += n_node * (n_node - 1) - n_edge * 2
|
||||||
|
cur_tot_edge = cur_node_count_arr.reshape(-1,1) * cur_node_count_arr.reshape(1,-1) * 2
|
||||||
|
# print(f"cur_tot_edge={cur_tot_edge}, shape: {cur_tot_edge.shape}")
|
||||||
|
cur_tot_edge = cur_tot_edge - np.diag(cur_node_count_arr) * 2
|
||||||
|
transition_E[:, :, 0] += cur_tot_edge - transition_E_temp.sum(axis=-1)
|
||||||
|
assert (cur_tot_edge > transition_E_temp.sum(axis=-1)).sum() >= 0
|
||||||
|
|
||||||
|
n_nodes_per_graph = np.array(n_nodes_per_graph) / np.sum(n_nodes_per_graph)
|
||||||
|
n_nodes_per_graph = n_nodes_per_graph.tolist()[:51]
|
||||||
|
|
||||||
|
node_count_list = np.array(node_count_list) / np.sum(node_count_list)
|
||||||
|
print('processed meta info: ------', filename, '------')
|
||||||
|
print('len node_count_list', len(node_count_list))
|
||||||
|
print('len node_name_list', len(node_name_list))
|
||||||
|
active_nodes = np.array(node_name_list)[node_count_list > 0]
|
||||||
|
active_nodes = active_nodes.tolist()
|
||||||
|
node_count_list = node_count_list.tolist()
|
||||||
|
|
||||||
|
edge_count_list = np.array(edge_count_list) / np.sum(edge_count_list)
|
||||||
|
edge_count_list = edge_count_list.tolist()
|
||||||
|
valencies = np.array(valencies) / np.sum(valencies)
|
||||||
|
valencies = valencies.tolist()
|
||||||
|
|
||||||
|
no_edge = np.sum(transition_E, axis=-1) == 0
|
||||||
|
first_elt = transition_E[:, :, 0]
|
||||||
|
first_elt[no_edge] = 1
|
||||||
|
transition_E[:, :, 0] = first_elt
|
||||||
|
|
||||||
|
transition_E = transition_E / np.sum(transition_E, axis=-1, keepdims=True)
|
||||||
|
|
||||||
|
meta_dict = {
|
||||||
|
'source': source_name,
|
||||||
|
'num_graph': num_graph,
|
||||||
|
'n_nodes_per_graph': n_nodes_per_graph,
|
||||||
|
'max_n_nodes': max(n_node_list),
|
||||||
|
'max_n_edges': max(n_edge_list),
|
||||||
|
'node_type_list': node_count_list,
|
||||||
|
'edge_type_list': edge_count_list,
|
||||||
|
'valencies': valencies,
|
||||||
|
'active_nodes': active_nodes,
|
||||||
|
'num_active_nodes': len(active_nodes),
|
||||||
|
'transition_E': transition_E.tolist(),
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(f'/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench-201-meta.json', 'w') as f:
|
||||||
|
json.dump(meta_dict, f)
|
||||||
|
|
||||||
|
return meta_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def graphs_to_json(graphs, filename):
|
def graphs_to_json(graphs, filename):
|
||||||
bonds = {
|
bonds = {
|
||||||
'nor_conv_1x1': 1,
|
'nor_conv_1x1': 1,
|
||||||
@@ -466,7 +615,7 @@ def graphs_to_json(graphs, filename):
|
|||||||
'atom_type_dist': atom_count_list,
|
'atom_type_dist': atom_count_list,
|
||||||
'bond_type_dist': bond_count_list,
|
'bond_type_dist': bond_count_list,
|
||||||
'valencies': valencies,
|
'valencies': valencies,
|
||||||
'active_atoms': [atom_name_list[i] for i in range(118) if atom_count_list[i] > 0],
|
'active_nodes': [atom_name_list[i] for i in range(118) if atom_count_list[i] > 0],
|
||||||
'num_atom_type': len([atom_name_list[i] for i in range(118) if atom_count_list[i] > 0]),
|
'num_atom_type': len([atom_name_list[i] for i in range(118) if atom_count_list[i] > 0]),
|
||||||
'transition_E': transition_E.tolist(),
|
'transition_E': transition_E.tolist(),
|
||||||
}
|
}
|
||||||
@@ -477,14 +626,17 @@ def graphs_to_json(graphs, filename):
|
|||||||
class Dataset(InMemoryDataset):
|
class Dataset(InMemoryDataset):
|
||||||
def __init__(self, source, root, target_prop=None, transform=None, pre_transform=None, pre_filter=None):
|
def __init__(self, source, root, target_prop=None, transform=None, pre_transform=None, pre_filter=None):
|
||||||
self.target_prop = target_prop
|
self.target_prop = target_prop
|
||||||
source = '/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
|
source = '/home/stud/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
|
||||||
self.source = source
|
self.source = source
|
||||||
self.api = API(source) # Initialize NAS-Bench-201 API
|
# self.api = API(source) # Initialize NAS-Bench-201 API
|
||||||
print('API loaded')
|
# print('API loaded')
|
||||||
super().__init__(root, transform, pre_transform, pre_filter)
|
super().__init__(root, transform, pre_transform, pre_filter)
|
||||||
print('Dataset initialized')
|
print(self.processed_paths[0]) #/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth.pt
|
||||||
print(self.processed_paths[0])
|
|
||||||
self.data, self.slices = torch.load(self.processed_paths[0])
|
self.data, self.slices = torch.load(self.processed_paths[0])
|
||||||
|
print('Dataset initialized')
|
||||||
|
self.data.edge_attr = self.data.edge_attr.squeeze()
|
||||||
|
self.data.idx = torch.arange(len(self.data.y))
|
||||||
|
print(f"self.data={self.data}, self.slices={self.slices}")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def raw_file_names(self):
|
def raw_file_names(self):
|
||||||
@@ -495,82 +647,172 @@ class Dataset(InMemoryDataset):
|
|||||||
return [f'{self.source}.pt']
|
return [f'{self.source}.pt']
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
def parse_architecture_string(arch_str):
|
source = '/home/stud/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
|
||||||
stages = arch_str.split('+')
|
self.api = API(source)
|
||||||
nodes = ['input']
|
|
||||||
edges = []
|
|
||||||
|
|
||||||
for stage in stages:
|
|
||||||
operations = stage.strip('|').split('|')
|
|
||||||
for op in operations:
|
|
||||||
operation, idx = op.split('~')
|
|
||||||
idx = int(idx)
|
|
||||||
edges.append((idx, len(nodes))) # Add edge from idx to the new node
|
|
||||||
nodes.append(operation)
|
|
||||||
nodes.append('output') # Add the output node
|
|
||||||
return nodes, edges
|
|
||||||
|
|
||||||
def create_graph(nodes, edges):
|
|
||||||
G = nx.DiGraph()
|
|
||||||
for i, node in enumerate(nodes):
|
|
||||||
G.add_node(i, label=node)
|
|
||||||
G.add_edges_from(edges)
|
|
||||||
return G
|
|
||||||
|
|
||||||
def arch_to_graph(arch_str, sa, sc, target, target2=None, target3=None):
|
|
||||||
nodes, edges = parse_architecture_string(arch_str)
|
|
||||||
|
|
||||||
node_labels = [bonds[node] for node in nodes] # Replace with appropriate encoding if necessary
|
|
||||||
assert 0 not in node_labels, f'Invalid node label: {node_labels}'
|
|
||||||
x = torch.LongTensor(node_labels)
|
|
||||||
print(f'in initialize Dataset, arch_to_Graph x={x}')
|
|
||||||
|
|
||||||
edges_list = [(start, end) for start, end in edges]
|
|
||||||
edge_type = [bonds[nodes[end]] for start, end in edges] # Example: using end node type as edge type
|
|
||||||
edge_index = torch.tensor(edges_list, dtype=torch.long).t().contiguous()
|
|
||||||
edge_type = torch.tensor(edge_type, dtype=torch.long)
|
|
||||||
edge_attr = edge_type.view(-1, 1)
|
|
||||||
|
|
||||||
if target3 is not None:
|
|
||||||
y = torch.tensor([sa, sc, target, target2, target3], dtype=torch.float).view(1, -1)
|
|
||||||
elif target2 is not None:
|
|
||||||
y = torch.tensor([sa, sc, target, target2], dtype=torch.float).view(1, -1)
|
|
||||||
else:
|
|
||||||
y = torch.tensor([sa, sc, target], dtype=torch.float).view(1, -1)
|
|
||||||
|
|
||||||
print(f'in initialize Dataset, Data_init, x={x}, y={y}, edge_index={edge_index}, edge_attr={edge_attr}')
|
|
||||||
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
|
|
||||||
return data, nodes
|
|
||||||
|
|
||||||
bonds = {
|
|
||||||
'nor_conv_1x1': 1,
|
|
||||||
'nor_conv_3x3': 2,
|
|
||||||
'avg_pool_3x3': 3,
|
|
||||||
'skip_connect': 4,
|
|
||||||
'output': 5,
|
|
||||||
'none': 6,
|
|
||||||
'input': 7
|
|
||||||
}
|
|
||||||
|
|
||||||
# Prepare to process NAS-Bench-201 data
|
|
||||||
data_list = []
|
data_list = []
|
||||||
len_data = len(self.api) # Number of architectures
|
len_data = len(self.api)
|
||||||
with tqdm(total=len_data) as pbar:
|
|
||||||
for arch_index in range(len_data):
|
|
||||||
arch_info = self.api.query_meta_info_by_index(arch_index)
|
|
||||||
arch_str = arch_info.arch_str
|
|
||||||
sa = np.random.rand() # Placeholder for synthetic accessibility
|
|
||||||
sc = np.random.rand() # Placeholder for substructure count
|
|
||||||
target = np.random.rand() # Placeholder for target value
|
|
||||||
target2 = np.random.rand() # Placeholder for second target value
|
|
||||||
target3 = np.random.rand() # Placeholder for third target value
|
|
||||||
|
|
||||||
data, active_nodes = arch_to_graph(arch_str, sa, sc, target, target2, target3)
|
def graph_to_graph_data(graph):
|
||||||
|
ops = graph[1]
|
||||||
|
adj = graph[0]
|
||||||
|
nodes = []
|
||||||
|
for op in ops:
|
||||||
|
nodes.append(op_type[op])
|
||||||
|
x = torch.LongTensor(nodes)
|
||||||
|
|
||||||
|
edges_list = []
|
||||||
|
edge_type = []
|
||||||
|
for start in range(len(ops)):
|
||||||
|
for end in range(len(ops)):
|
||||||
|
if adj[start][end] == 1:
|
||||||
|
edges_list.append((start, end))
|
||||||
|
edge_type.append(1)
|
||||||
|
edges_list.append((end, start))
|
||||||
|
edge_type.append(1)
|
||||||
|
|
||||||
|
edge_index = torch.tensor(edges_list, dtype=torch.long).t()
|
||||||
|
edge_type = torch.tensor(edge_type, dtype=torch.long)
|
||||||
|
edge_attr = edge_type
|
||||||
|
y = torch.tensor([0], dtype=torch.float).view(1, -1)
|
||||||
|
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i)
|
||||||
|
return data
|
||||||
|
graph_list = []
|
||||||
|
|
||||||
|
with tqdm(total = len_data) as pbar:
|
||||||
|
active_nodes = set()
|
||||||
|
for i in range(len_data):
|
||||||
|
arch_info = self.api.query_meta_info_by_index(i)
|
||||||
|
results = self.api.query_by_index(i, 'cifar100')
|
||||||
|
nodes, edges = parse_architecture_string(arch_info.arch_str)
|
||||||
|
adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)
|
||||||
|
for op in ops:
|
||||||
|
if op not in active_nodes:
|
||||||
|
active_nodes.add(op)
|
||||||
|
|
||||||
|
graph_list.append({
|
||||||
|
"adj_matrix": adj_matrix,
|
||||||
|
"ops": ops,
|
||||||
|
"idx": i,
|
||||||
|
"train": [{
|
||||||
|
"iepoch": result.get_train()['iepoch'],
|
||||||
|
"loss": result.get_train()['loss'],
|
||||||
|
"accuracy": result.get_train()['accuracy'],
|
||||||
|
"cur_time": result.get_train()['cur_time'],
|
||||||
|
"all_time": result.get_train()['all_time'],
|
||||||
|
"seed": seed,
|
||||||
|
}for seed, result in results.items()],
|
||||||
|
"valid": [{
|
||||||
|
"iepoch": result.get_eval('x-valid')['iepoch'],
|
||||||
|
"loss": result.get_eval('x-valid')['loss'],
|
||||||
|
"accuracy": result.get_eval('x-valid')['accuracy'],
|
||||||
|
"cur_time": result.get_eval('x-valid')['cur_time'],
|
||||||
|
"all_time": result.get_eval('x-valid')['all_time'],
|
||||||
|
"seed": seed,
|
||||||
|
}for seed, result in results.items()],
|
||||||
|
"test": [{
|
||||||
|
"iepoch": result.get_eval('x-test')['iepoch'],
|
||||||
|
"loss": result.get_eval('x-test')['loss'],
|
||||||
|
"accuracy": result.get_eval('x-test')['accuracy'],
|
||||||
|
"cur_time": result.get_eval('x-test')['cur_time'],
|
||||||
|
"all_time": result.get_eval('x-test')['all_time'],
|
||||||
|
"seed": seed,
|
||||||
|
}for seed, result in results.items()]
|
||||||
|
})
|
||||||
|
data = graph_to_graph_data((adj_matrix, ops))
|
||||||
data_list.append(data)
|
data_list.append(data)
|
||||||
pbar.update(1)
|
pbar.update(1)
|
||||||
|
|
||||||
|
for graph in graph_list:
|
||||||
|
adj_matrix = graph['adj_matrix']
|
||||||
|
if isinstance(adj_matrix, np.ndarray):
|
||||||
|
adj_matrix = adj_matrix.tolist()
|
||||||
|
graph['adj_matrix'] = adj_matrix
|
||||||
|
ops = graph['ops']
|
||||||
|
if isinstance(ops, np.ndarray):
|
||||||
|
ops = ops.tolist()
|
||||||
|
graph['ops'] = ops
|
||||||
|
with open(f'nasbench-201-graph.json', 'w') as f:
|
||||||
|
json.dump(graph_list, f)
|
||||||
|
|
||||||
torch.save(self.collate(data_list), self.processed_paths[0])
|
torch.save(self.collate(data_list), self.processed_paths[0])
|
||||||
|
|
||||||
|
# def parse_architecture_string(arch_str):
|
||||||
|
# stages = arch_str.split('+')
|
||||||
|
# nodes = ['input']
|
||||||
|
# edges = []
|
||||||
|
|
||||||
|
# for stage in stages:
|
||||||
|
# operations = stage.strip('|').split('|')
|
||||||
|
# for op in operations:
|
||||||
|
# operation, idx = op.split('~')
|
||||||
|
# idx = int(idx)
|
||||||
|
# edges.append((idx, len(nodes))) # Add edge from idx to the new node
|
||||||
|
# nodes.append(operation)
|
||||||
|
# nodes.append('output') # Add the output node
|
||||||
|
# return nodes, edges
|
||||||
|
|
||||||
|
# def create_graph(nodes, edges):
|
||||||
|
# G = nx.DiGraph()
|
||||||
|
# for i, node in enumerate(nodes):
|
||||||
|
# G.add_node(i, label=node)
|
||||||
|
# G.add_edges_from(edges)
|
||||||
|
# return G
|
||||||
|
|
||||||
|
# def arch_to_graph(arch_str, sa, sc, target, target2=None, target3=None):
|
||||||
|
# nodes, edges = parse_architecture_string(arch_str)
|
||||||
|
|
||||||
|
# node_labels = [bonds[node] for node in nodes] # Replace with appropriate encoding if necessary
|
||||||
|
# assert 0 not in node_labels, f'Invalid node label: {node_labels}'
|
||||||
|
# x = torch.LongTensor(node_labels)
|
||||||
|
# print(f'in initialize Dataset, arch_to_Graph x={x}')
|
||||||
|
|
||||||
|
# edges_list = [(start, end) for start, end in edges]
|
||||||
|
# edge_type = [bonds[nodes[end]] for start, end in edges] # Example: using end node type as edge type
|
||||||
|
# edge_index = torch.tensor(edges_list, dtype=torch.long).t().contiguous()
|
||||||
|
# edge_type = torch.tensor(edge_type, dtype=torch.long)
|
||||||
|
# edge_attr = edge_type.view(-1, 1)
|
||||||
|
|
||||||
|
# if target3 is not None:
|
||||||
|
# y = torch.tensor([sa, sc, target, target2, target3], dtype=torch.float).view(1, -1)
|
||||||
|
# elif target2 is not None:
|
||||||
|
# y = torch.tensor([sa, sc, target, target2], dtype=torch.float).view(1, -1)
|
||||||
|
# else:
|
||||||
|
# y = torch.tensor([sa, sc, target], dtype=torch.float).view(1, -1)
|
||||||
|
|
||||||
|
# print(f'in initialize Dataset, Data_init, x={x}, y={y}, edge_index={edge_index}, edge_attr={edge_attr}')
|
||||||
|
# data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
|
||||||
|
# return data, nodes
|
||||||
|
|
||||||
|
# bonds = {
|
||||||
|
# 'nor_conv_1x1': 1,
|
||||||
|
# 'nor_conv_3x3': 2,
|
||||||
|
# 'avg_pool_3x3': 3,
|
||||||
|
# 'skip_connect': 4,
|
||||||
|
# 'output': 5,
|
||||||
|
# 'none': 6,
|
||||||
|
# 'input': 7
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # Prepare to process NAS-Bench-201 data
|
||||||
|
# data_list = []
|
||||||
|
# len_data = len(self.api) # Number of architectures
|
||||||
|
# with tqdm(total=len_data) as pbar:
|
||||||
|
# for arch_index in range(len_data):
|
||||||
|
# arch_info = self.api.query_meta_info_by_index(arch_index)
|
||||||
|
# arch_str = arch_info.arch_str
|
||||||
|
# sa = np.random.rand() # Placeholder for synthetic accessibility
|
||||||
|
# sc = np.random.rand() # Placeholder for substructure count
|
||||||
|
# target = np.random.rand() # Placeholder for target value
|
||||||
|
# target2 = np.random.rand() # Placeholder for second target value
|
||||||
|
# target3 = np.random.rand() # Placeholder for third target value
|
||||||
|
|
||||||
|
# data, active_nodes = arch_to_graph(arch_str, sa, sc, target, target2, target3)
|
||||||
|
# data_list.append(data)
|
||||||
|
# pbar.update(1)
|
||||||
|
|
||||||
|
# torch.save(self.collate(data_list), self.processed_paths[0])
|
||||||
|
|
||||||
class Dataset_origin(InMemoryDataset):
|
class Dataset_origin(InMemoryDataset):
|
||||||
def __init__(self, source, root, target_prop=None,
|
def __init__(self, source, root, target_prop=None,
|
||||||
transform=None, pre_transform=None, pre_filter=None):
|
transform=None, pre_transform=None, pre_filter=None):
|
||||||
@@ -676,7 +918,7 @@ def create_adj_matrix_and_ops(nodes, edges):
|
|||||||
adj_matrix[src][dst] = 1
|
adj_matrix[src][dst] = 1
|
||||||
return adj_matrix, nodes
|
return adj_matrix, nodes
|
||||||
class DataInfos(AbstractDatasetInfos):
|
class DataInfos(AbstractDatasetInfos):
|
||||||
def __init__(self, datamodule, cfg):
|
def __init__(self, datamodule, cfg, dataset):
|
||||||
tasktype_dict = {
|
tasktype_dict = {
|
||||||
'hiv_b': 'classification',
|
'hiv_b': 'classification',
|
||||||
'bace_b': 'classification',
|
'bace_b': 'classification',
|
||||||
@@ -689,6 +931,7 @@ class DataInfos(AbstractDatasetInfos):
|
|||||||
self.task = task_name
|
self.task = task_name
|
||||||
self.task_type = tasktype_dict.get(task_name, "regression")
|
self.task_type = tasktype_dict.get(task_name, "regression")
|
||||||
self.ensure_connected = cfg.model.ensure_connected
|
self.ensure_connected = cfg.model.ensure_connected
|
||||||
|
# self.api = dataset.api
|
||||||
|
|
||||||
datadir = cfg.dataset.datadir
|
datadir = cfg.dataset.datadir
|
||||||
|
|
||||||
@@ -699,36 +942,55 @@ class DataInfos(AbstractDatasetInfos):
|
|||||||
length = 15625
|
length = 15625
|
||||||
ops_type = {}
|
ops_type = {}
|
||||||
len_ops = set()
|
len_ops = set()
|
||||||
api = API('/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth')
|
# api = API('/home/stud/hanzhang/Graph-DiT/graph_dit/NAS-Bench-201-v1_1-096897.pth')
|
||||||
for i in range(length):
|
|
||||||
arch_info = api.query_meta_info_by_index(i)
|
|
||||||
nodes, edges = parse_architecture_string(arch_info.arch_str)
|
|
||||||
adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)
|
|
||||||
if i < 5:
|
|
||||||
print("Adjacency Matrix:")
|
|
||||||
print(adj_matrix)
|
|
||||||
print("Operations List:")
|
|
||||||
print(ops)
|
|
||||||
for op in ops:
|
|
||||||
if op not in ops_type:
|
|
||||||
ops_type[op] = len(ops_type)
|
|
||||||
len_ops.add(len(ops))
|
|
||||||
graphs.append((adj_matrix, ops))
|
|
||||||
|
|
||||||
meta_dict = graphs_to_json(graphs, 'nasbench-201')
|
|
||||||
|
|
||||||
|
def read_adj_ops_from_json(filename):
|
||||||
|
with open(filename, 'r') as json_file:
|
||||||
|
data = json.load(json_file)
|
||||||
|
|
||||||
|
adj_ops_pairs = []
|
||||||
|
for item in data:
|
||||||
|
adj_matrix = np.array(item['adj_matrix'])
|
||||||
|
ops = item['ops']
|
||||||
|
ops = [op_type[op] for op in ops]
|
||||||
|
adj_ops_pairs.append((adj_matrix, ops))
|
||||||
|
|
||||||
|
return adj_ops_pairs
|
||||||
|
# for i in range(length):
|
||||||
|
# arch_info = self.api.query_meta_info_by_index(i)
|
||||||
|
# nodes, edges = parse_architecture_string(arch_info.arch_str)
|
||||||
|
# adj_matrix, ops = create_adj_matrix_and_ops(nodes, edges)
|
||||||
|
# if i < 5:
|
||||||
|
# print("Adjacency Matrix:")
|
||||||
|
# print(adj_matrix)
|
||||||
|
# print("Operations List:")
|
||||||
|
# print(ops)
|
||||||
|
# for op in ops:
|
||||||
|
# if op not in ops_type:
|
||||||
|
# ops_type[op] = len(ops_type)
|
||||||
|
# len_ops.add(len(ops))
|
||||||
|
# graphs.append((adj_matrix, ops))
|
||||||
|
graphs = read_adj_ops_from_json(f'/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json')
|
||||||
|
|
||||||
|
# check first five graphs
|
||||||
|
for i in range(5):
|
||||||
|
print(f'graph {i} : {graphs[i]}')
|
||||||
|
print(f'ops_type: {ops_type}')
|
||||||
|
|
||||||
|
meta_dict = new_graphs_to_json(graphs, 'nasbench-201')
|
||||||
self.base_path = base_path
|
self.base_path = base_path
|
||||||
self.active_atoms = meta_dict['active_atoms']
|
self.active_nodes = meta_dict['active_nodes']
|
||||||
self.max_n_nodes = meta_dict['max_node']
|
self.max_n_nodes = meta_dict['max_n_nodes']
|
||||||
self.original_max_n_nodes = meta_dict['max_node']
|
self.original_max_n_nodes = meta_dict['max_n_nodes']
|
||||||
self.n_nodes = torch.Tensor(meta_dict['n_atoms_per_mol_dist'])
|
self.n_nodes = torch.Tensor(meta_dict['n_nodes_per_graph'])
|
||||||
self.edge_types = torch.Tensor(meta_dict['bond_type_dist'])
|
self.edge_types = torch.Tensor(meta_dict['edge_type_list'])
|
||||||
self.transition_E = torch.Tensor(meta_dict['transition_E'])
|
self.transition_E = torch.Tensor(meta_dict['transition_E'])
|
||||||
|
|
||||||
self.atom_decoder = meta_dict['active_atoms']
|
self.node_decoder = meta_dict['active_nodes']
|
||||||
node_types = torch.Tensor(meta_dict['atom_type_dist'])
|
node_types = torch.Tensor(meta_dict['node_type_list'])
|
||||||
active_index = (node_types > 0).nonzero().squeeze()
|
active_index = (node_types > 0).nonzero().squeeze()
|
||||||
self.node_types = torch.Tensor(meta_dict['atom_type_dist'])[active_index]
|
self.node_types = torch.Tensor(meta_dict['node_type_list'])[active_index]
|
||||||
self.nodes_dist = DistributionNodes(self.n_nodes)
|
self.nodes_dist = DistributionNodes(self.n_nodes)
|
||||||
self.active_index = active_index
|
self.active_index = active_index
|
||||||
|
|
||||||
@@ -923,11 +1185,11 @@ def compute_meta(root, source_name, train_index, test_index):
|
|||||||
'transition_E': tansition_E.tolist(),
|
'transition_E': tansition_E.tolist(),
|
||||||
}
|
}
|
||||||
|
|
||||||
with open(f'{root}/{source_name}.meta.json', "w") as f:
|
with open(f'/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench201.meta.json', "w") as f:
|
||||||
json.dump(meta_dict, f)
|
json.dump(meta_dict, f)
|
||||||
|
|
||||||
return meta_dict
|
return meta_dict
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
pass
|
dataset = Dataset(source='nasbench', root='/home/stud/hanzhang/nasbenchDiT/graph-dit', target_prop='Class', transform=None)
|
||||||
|
@@ -13,11 +13,11 @@ from metrics.abstract_metrics import SumExceptBatchMetric, SumExceptBatchKL, NLL
|
|||||||
import utils
|
import utils
|
||||||
|
|
||||||
class Graph_DiT(pl.LightningModule):
|
class Graph_DiT(pl.LightningModule):
|
||||||
# def __init__(self, cfg, dataset_infos, train_metrics, sampling_metrics, visualization_tools):
|
def __init__(self, cfg, dataset_infos, train_metrics, sampling_metrics, visualization_tools):
|
||||||
def __init__(self, cfg, dataset_infos, visualization_tools):
|
# def __init__(self, cfg, dataset_infos, visualization_tools):
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
# self.save_hyperparameters(ignore=['train_metrics', 'sampling_metrics'])
|
self.save_hyperparameters(ignore=['train_metrics', 'sampling_metrics'])
|
||||||
self.test_only = cfg.general.test_only
|
self.test_only = cfg.general.test_only
|
||||||
self.guidance_target = getattr(cfg.dataset, 'guidance_target', None)
|
self.guidance_target = getattr(cfg.dataset, 'guidance_target', None)
|
||||||
|
|
||||||
@@ -57,8 +57,8 @@ class Graph_DiT(pl.LightningModule):
|
|||||||
self.test_E_logp = SumExceptBatchMetric()
|
self.test_E_logp = SumExceptBatchMetric()
|
||||||
self.test_y_collection = []
|
self.test_y_collection = []
|
||||||
|
|
||||||
# self.train_metrics = train_metrics
|
self.train_metrics = train_metrics
|
||||||
# self.sampling_metrics = sampling_metrics
|
self.sampling_metrics = sampling_metrics
|
||||||
|
|
||||||
self.visualization_tools = visualization_tools
|
self.visualization_tools = visualization_tools
|
||||||
self.max_n_nodes = dataset_infos.max_n_nodes
|
self.max_n_nodes = dataset_infos.max_n_nodes
|
||||||
@@ -179,9 +179,9 @@ class Graph_DiT(pl.LightningModule):
|
|||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def validation_step(self, data, i):
|
def validation_step(self, data, i):
|
||||||
data_x = F.one_hot(data.x, num_classes=118).float()[:, self.active_index]
|
data_x = F.one_hot(data.x, num_classes=118).float()[:, self.active_index]
|
||||||
data_edge_attr = F.one_hot(data.edge_attr, num_classes=5).float()
|
data_edge_attr = F.one_hot(data.edge_attr, num_classes=10).float()
|
||||||
dense_data, node_mask = utils.to_dense(data_x, data.edge_index, data_edge_attr, data.batch, self.max_n_nodes)
|
dense_data, node_mask = utils.to_dense(data_x, data.edge_index, data_edge_attr, data.batch, self.max_n_nodes)
|
||||||
dense_data = dense_data.mask(node_mask)
|
dense_data = dense_data.mask(node_mask, collapse=True)
|
||||||
noisy_data = self.apply_noise(dense_data.X, dense_data.E, data.y, node_mask)
|
noisy_data = self.apply_noise(dense_data.X, dense_data.E, data.y, node_mask)
|
||||||
pred = self.forward(noisy_data)
|
pred = self.forward(noisy_data)
|
||||||
nll = self.compute_val_loss(pred, noisy_data, dense_data.X, dense_data.E, data.y, node_mask, test=False)
|
nll = self.compute_val_loss(pred, noisy_data, dense_data.X, dense_data.E, data.y, node_mask, test=False)
|
||||||
|
@@ -78,16 +78,20 @@ def main(cfg: DictConfig):
|
|||||||
|
|
||||||
datamodule = dataset.DataModule(cfg)
|
datamodule = dataset.DataModule(cfg)
|
||||||
datamodule.prepare_data()
|
datamodule.prepare_data()
|
||||||
dataset_infos = dataset.DataInfos(datamodule=datamodule, cfg=cfg)
|
dataset_infos = dataset.DataInfos(datamodule=datamodule, cfg=cfg, dataset=datamodule.dataset)
|
||||||
# train_smiles, reference_smiles = datamodule.get_train_smiles()
|
# train_smiles, reference_smiles = datamodule.get_train_smiles()
|
||||||
|
train_graphs, reference_graphs = datamodule.get_train_graphs()
|
||||||
|
|
||||||
# get input output dimensions
|
# get input output dimensions
|
||||||
dataset_infos.compute_input_output_dims(datamodule=datamodule)
|
dataset_infos.compute_input_output_dims(datamodule=datamodule)
|
||||||
# train_metrics = TrainMolecularMetricsDiscrete(dataset_infos)
|
train_metrics = TrainMolecularMetricsDiscrete(dataset_infos)
|
||||||
|
|
||||||
# sampling_metrics = SamplingMolecularMetrics(
|
# sampling_metrics = SamplingMolecularMetrics(
|
||||||
# dataset_infos, train_smiles, reference_smiles
|
# dataset_infos, train_smiles, reference_smiles
|
||||||
# )
|
# )
|
||||||
|
sampling_metrics = SamplingGraphMetrics(
|
||||||
|
dataset_infos, train_graphs, reference_graphs
|
||||||
|
)
|
||||||
visualization_tools = MolecularVisualization(dataset_infos)
|
visualization_tools = MolecularVisualization(dataset_infos)
|
||||||
|
|
||||||
model_kwargs = {
|
model_kwargs = {
|
||||||
@@ -135,5 +139,16 @@ def main(cfg: DictConfig):
|
|||||||
else:
|
else:
|
||||||
trainer.test(model, datamodule=datamodule, ckpt_path=cfg.general.test_only)
|
trainer.test(model, datamodule=datamodule, ckpt_path=cfg.general.test_only)
|
||||||
|
|
||||||
|
@hydra.main(
|
||||||
|
version_base="1.1", config_path="../configs", config_name="config"
|
||||||
|
)
|
||||||
|
def test(cfg: DictConfig):
|
||||||
|
datamodule = dataset.DataModule(cfg)
|
||||||
|
datamodule.prepare_data()
|
||||||
|
dataset_infos = dataset.DataInfos(datamodule=datamodule, cfg=cfg, dataset=datamodule.dataset)
|
||||||
|
train_graphs, reference_graphs = datamodule.get_train_graphs()
|
||||||
|
|
||||||
|
dataset_infos.compute_input_output_dims(datamodule=datamodule)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
test()
|
||||||
|
@@ -23,7 +23,104 @@ def result_to_csv(path, dict_data):
|
|||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
writer.writerow(dict_data)
|
writer.writerow(dict_data)
|
||||||
|
|
||||||
|
class SamplingGraphMetrics(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
dataset_infos,
|
||||||
|
train_graphs,
|
||||||
|
reference_graphs,
|
||||||
|
n_jobs=1,
|
||||||
|
device="cpu",
|
||||||
|
batch_size=512,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.task_name = dataset_infos.task
|
||||||
|
self.dataset_infos = dataset_infos
|
||||||
|
self.active_nodes = dataset_infos.active_nodes
|
||||||
|
self.train_graphs = train_graphs
|
||||||
|
|
||||||
|
self.stat_ref = None
|
||||||
|
|
||||||
|
self.compute_config = {
|
||||||
|
"n_jobs": n_jobs,
|
||||||
|
"device": device,
|
||||||
|
"batch_size": batch_size,
|
||||||
|
}
|
||||||
|
|
||||||
|
self.task_evaluator = {
|
||||||
|
'meta_taskname': dataset_infos.task,
|
||||||
|
'sas': None,
|
||||||
|
'scs': None
|
||||||
|
}
|
||||||
|
|
||||||
|
for cur_task in dataset_infos.task.split("-")[:]:
|
||||||
|
model_path = os.path.join(
|
||||||
|
dataset_infos.base_path, "data/evaluator", f"{cur_task}.joblib"
|
||||||
|
)
|
||||||
|
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
||||||
|
evaluator = TaskModel(model_path, cur_task)
|
||||||
|
self.task_evaluator[cur_task] = evaluator
|
||||||
|
|
||||||
|
def forward(self, graphs, targets, name, current_epoch, val_counter, test=False):
|
||||||
|
if isinstance(targets, list):
|
||||||
|
targets_cat = torch.cat(targets, dim=0)
|
||||||
|
targets_np = targets_cat.detach().cpu().numpy()
|
||||||
|
else:
|
||||||
|
targets_np = targets.detach().cpu().numpy()
|
||||||
|
|
||||||
|
unique_graphs, all_graphs, all_graphs, targets_log = compute_molecular_metrics(
|
||||||
|
graphs,
|
||||||
|
targets_np,
|
||||||
|
self.train_graphs,
|
||||||
|
self.stat_ref,
|
||||||
|
self.dataset_infos,
|
||||||
|
self.task_evaluator,
|
||||||
|
self.compute_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
if test:
|
||||||
|
file_name = "final_graphs.txt"
|
||||||
|
with open(file_name, "w") as fp:
|
||||||
|
all_tasks_name = list(self.task_evaluator.keys())
|
||||||
|
all_tasks_name = all_tasks_name.copy()
|
||||||
|
if 'meta_taskname' in all_tasks_name:
|
||||||
|
all_tasks_name.remove('meta_taskname')
|
||||||
|
|
||||||
|
all_tasks_str = "graph, " + ", ".join([f"input_{task}" for task in all_tasks_name] + [f"output_{task}" for task in all_tasks_name])
|
||||||
|
fp.write(all_tasks_str + "\n")
|
||||||
|
for i, graph in enumerate(all_graphs):
|
||||||
|
if targets_log is not None:
|
||||||
|
all_result_str = f"{graph}, " + ", ".join([f"{targets_log['input_'+task][i]}" for task in all_tasks_name] + [f"{targets_log['output_'+task][i]}" for task in all_tasks_name])
|
||||||
|
fp.write(all_result_str + "\n")
|
||||||
|
else:
|
||||||
|
fp.write("%s\n" % graph)
|
||||||
|
print("All graphs saved")
|
||||||
|
else:
|
||||||
|
result_path = os.path.join(os.getcwd(), f"graphs/{name}")
|
||||||
|
os.makedirs(result_path, exist_ok=True)
|
||||||
|
text_path = os.path.join(
|
||||||
|
result_path,
|
||||||
|
f"valid_unique_graphs_e{current_epoch}_b{val_counter}.txt",
|
||||||
|
)
|
||||||
|
textfile = open(text_path, "w")
|
||||||
|
for graph in unique_graphs:
|
||||||
|
textfile.write(graph + "\n")
|
||||||
|
textfile.close()
|
||||||
|
|
||||||
|
all_logs = all_graphs
|
||||||
|
if test:
|
||||||
|
all_logs["log_name"] = "test"
|
||||||
|
else:
|
||||||
|
all_logs["log_name"] = (
|
||||||
|
"epoch" + str(current_epoch) + "_batch" + str(val_counter)
|
||||||
|
)
|
||||||
|
|
||||||
|
result_to_csv("output.csv", all_logs)
|
||||||
|
return all_graphs
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
pass
|
||||||
|
|
||||||
class SamplingMolecularMetrics(nn.Module):
|
class SamplingMolecularMetrics(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -40,21 +137,21 @@ class SamplingMolecularMetrics(nn.Module):
|
|||||||
self.active_atoms = dataset_infos.active_atoms
|
self.active_atoms = dataset_infos.active_atoms
|
||||||
self.train_smiles = train_smiles
|
self.train_smiles = train_smiles
|
||||||
|
|
||||||
if reference_smiles is not None:
|
# if reference_smiles is not None:
|
||||||
print(
|
# print(
|
||||||
f"--- Computing intermediate statistics for training for #{len(reference_smiles)} smiles ---"
|
# f"--- Computing intermediate statistics for training for #{len(reference_smiles)} smiles ---"
|
||||||
)
|
# )
|
||||||
start_time = time.time()
|
# start_time = time.time()
|
||||||
self.stat_ref = compute_intermediate_statistics(
|
# self.stat_ref = compute_intermediate_statistics(
|
||||||
reference_smiles, n_jobs=n_jobs, device=device, batch_size=batch_size
|
# reference_smiles, n_jobs=n_jobs, device=device, batch_size=batch_size
|
||||||
)
|
# )
|
||||||
end_time = time.time()
|
# end_time = time.time()
|
||||||
elapsed_time = end_time - start_time
|
# elapsed_time = end_time - start_time
|
||||||
print(
|
# print(
|
||||||
f"--- End computing intermediate statistics: using {elapsed_time:.2f}s ---"
|
# f"--- End computing intermediate statistics: using {elapsed_time:.2f}s ---"
|
||||||
)
|
# )
|
||||||
else:
|
# else:
|
||||||
self.stat_ref = None
|
self.stat_ref = None
|
||||||
|
|
||||||
self.comput_config = {
|
self.comput_config = {
|
||||||
"n_jobs": n_jobs,
|
"n_jobs": n_jobs,
|
||||||
|
@@ -35,7 +35,13 @@ class CEPerClass(Metric):
|
|||||||
|
|
||||||
def compute(self):
|
def compute(self):
|
||||||
return self.total_ce / self.total_samples
|
return self.total_ce / self.total_samples
|
||||||
|
class NodeCE(CEPerClass):
|
||||||
|
def __init__(self, i):
|
||||||
|
super().__init__(i)
|
||||||
|
|
||||||
|
class EdgeCE(CEPerClass):
|
||||||
|
def __init__(self, i):
|
||||||
|
super().__init__(i)
|
||||||
|
|
||||||
class AtomCE(CEPerClass):
|
class AtomCE(CEPerClass):
|
||||||
def __init__(self, i):
|
def __init__(self, i):
|
||||||
@@ -65,6 +71,21 @@ class AromaticCE(CEPerClass):
|
|||||||
def __init__(self, i):
|
def __init__(self, i):
|
||||||
super().__init__(i)
|
super().__init__(i)
|
||||||
|
|
||||||
|
class NodeMetricsCE(MetricCollection):
|
||||||
|
def __init__(self, active_nodes):
|
||||||
|
metrics_list = []
|
||||||
|
|
||||||
|
for i, node_type in enumerate(active_nodes) :
|
||||||
|
metrics_list.append(type(f'{node_type}_CE', (NodeCE,), {})(i))
|
||||||
|
super().__init__(metrics_list)
|
||||||
|
|
||||||
|
class EdgeMetricsCE(MetricCollection):
|
||||||
|
def __init__(self):
|
||||||
|
ce_no_bond = NoBondCE(0)
|
||||||
|
ce_SI = SingleCE(1)
|
||||||
|
ce_DO = DoubleCE(2)
|
||||||
|
ce_TR = TripleCE(3)
|
||||||
|
super().__init__([ce_no_bond, ce_SI, ce_DO, ce_TR])
|
||||||
|
|
||||||
class AtomMetricsCE(MetricCollection):
|
class AtomMetricsCE(MetricCollection):
|
||||||
def __init__(self, active_atoms):
|
def __init__(self, active_atoms):
|
||||||
@@ -84,7 +105,47 @@ class BondMetricsCE(MetricCollection):
|
|||||||
ce_TR = TripleCE(3)
|
ce_TR = TripleCE(3)
|
||||||
super().__init__([ce_no_bond, ce_SI, ce_DO, ce_TR])
|
super().__init__([ce_no_bond, ce_SI, ce_DO, ce_TR])
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
||||||
|
class TrainGraphMetricsDiscrete(nn.Module):
|
||||||
|
def __init__(self, dataset_infos):
|
||||||
|
super().__init__()
|
||||||
|
active_nodes = dataset_infos.active_nodes
|
||||||
|
self.train_node_metrics = NodeMetricsCE(active_nodes=active_nodes)
|
||||||
|
self.train_edge_metrics = EdgeMetricsCE()
|
||||||
|
|
||||||
|
def forward(self, masked_pred_X, masked_pred_E, true_X, true_E, log: bool):
|
||||||
|
self.train_node_metrics(masked_pred_X, true_X)
|
||||||
|
self.train_edge_metrics(masked_pred_E, true_E)
|
||||||
|
if log:
|
||||||
|
to_log = {}
|
||||||
|
for key, val in self.train_node_metrics.compute().items():
|
||||||
|
to_log['train/' + key] = val.item()
|
||||||
|
for key, val in self.train_edge_metrics.compute().items():
|
||||||
|
to_log['train/' + key] = val.item()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
for metric in [self.train_node_metrics, self.train_edge_metrics]:
|
||||||
|
metric.reset()
|
||||||
|
|
||||||
|
def log_epoch_metrics(self, current_epoch, log=True):
|
||||||
|
epoch_node_metrics = self.train_node_metrics.compute()
|
||||||
|
epoch_edge_metrics = self.train_edge_metrics.compute()
|
||||||
|
|
||||||
|
to_log = {}
|
||||||
|
for key, val in epoch_node_metrics.items():
|
||||||
|
to_log['train_epoch/' + key] = val.item()
|
||||||
|
for key, val in epoch_edge_metrics.items():
|
||||||
|
to_log['train_epoch/' + key] = val.item()
|
||||||
|
|
||||||
|
for key, val in epoch_node_metrics.items():
|
||||||
|
epoch_node_metrics[key] = round(val.item(),4)
|
||||||
|
for key, val in epoch_edge_metrics.items():
|
||||||
|
epoch_edge_metrics[key] = round(val.item(),4)
|
||||||
|
|
||||||
|
if log:
|
||||||
|
print(f"Epoch {current_epoch}: {epoch_node_metrics} -- {epoch_edge_metrics}")
|
||||||
|
|
||||||
class TrainMolecularMetricsDiscrete(nn.Module):
|
class TrainMolecularMetricsDiscrete(nn.Module):
|
||||||
def __init__(self, dataset_infos):
|
def __init__(self, dataset_infos):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
@@ -15,6 +15,17 @@ from rdkit.Chem import AllChem
|
|||||||
from rdkit import DataStructs
|
from rdkit import DataStructs
|
||||||
from rdkit.Chem import rdMolDescriptors
|
from rdkit.Chem import rdMolDescriptors
|
||||||
rdBase.DisableLog('rdApp.error')
|
rdBase.DisableLog('rdApp.error')
|
||||||
|
import json
|
||||||
|
|
||||||
|
op_type = {
|
||||||
|
'nor_conv_1x1': 1,
|
||||||
|
'nor_conv_3x3': 2,
|
||||||
|
'avg_pool_3x3': 3,
|
||||||
|
'skip_connect': 4,
|
||||||
|
'output': 5,
|
||||||
|
'none': 6,
|
||||||
|
'input': 7
|
||||||
|
}
|
||||||
|
|
||||||
task_to_colname = {
|
task_to_colname = {
|
||||||
'hiv_b': 'HIV_active',
|
'hiv_b': 'HIV_active',
|
||||||
@@ -32,8 +43,10 @@ tasktype_name = {
|
|||||||
'O2': 'regression',
|
'O2': 'regression',
|
||||||
'N2': 'regression',
|
'N2': 'regression',
|
||||||
'CO2': 'regression',
|
'CO2': 'regression',
|
||||||
|
'nasbench201': 'regression',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class TaskModel():
|
class TaskModel():
|
||||||
"""Scores based on an ECFP classifier."""
|
"""Scores based on an ECFP classifier."""
|
||||||
def __init__(self, model_path, task_name):
|
def __init__(self, model_path, task_name):
|
||||||
@@ -55,8 +68,47 @@ class TaskModel():
|
|||||||
perfermance = self.train()
|
perfermance = self.train()
|
||||||
dump(self.model, model_path)
|
dump(self.model, model_path)
|
||||||
print('Oracle peformance: ', perfermance)
|
print('Oracle peformance: ', perfermance)
|
||||||
|
|
||||||
def train(self):
|
def train(self):
|
||||||
|
def read_adj_ops_from_json(filename):
|
||||||
|
with open(filename, 'r') as json_file:
|
||||||
|
data = json.load(json_file)
|
||||||
|
|
||||||
|
adj_ops_pairs = []
|
||||||
|
for item in data:
|
||||||
|
adj_matrix = np.array(item['adj_matrix'])
|
||||||
|
ops = item['ops']
|
||||||
|
acc = item['train'][0]['accuracy']
|
||||||
|
adj_ops_pairs.append((adj_matrix, ops, acc))
|
||||||
|
|
||||||
|
return adj_ops_pairs
|
||||||
|
def feature_from_adj_and_ops(adj, ops):
|
||||||
|
return np.concatenate([adj.flatten(), ops])
|
||||||
|
filename = '/home/stud/hanzhang/nasbenchDiT/graph_dit/nasbench-201-graph.json'
|
||||||
|
graphs = read_adj_ops_from_json(filename)
|
||||||
|
adjs = []
|
||||||
|
opss = []
|
||||||
|
accs = []
|
||||||
|
features = []
|
||||||
|
for graph in graphs:
|
||||||
|
adj, ops, acc=graph
|
||||||
|
op_code = [op_type[op] for op in ops]
|
||||||
|
adjs.append(adj)
|
||||||
|
opss.append(op_code)
|
||||||
|
accs.append(acc)
|
||||||
|
features.append(feature_from_adj_and_ops(adj, op_code))
|
||||||
|
features = np.array(features)
|
||||||
|
labels = np.array(accs)
|
||||||
|
|
||||||
|
mask = ~np.isnan(labels)
|
||||||
|
labels = labels[mask]
|
||||||
|
features = features[mask]
|
||||||
|
self.model.fit(features, labels)
|
||||||
|
y_pred = self.model.predict(features)
|
||||||
|
perf = self.metric_func(labels, y_pred)
|
||||||
|
print(f'{self.task_name} performance: {perf}')
|
||||||
|
return perf
|
||||||
|
|
||||||
|
def train__(self):
|
||||||
data_path = os.path.dirname(self.model_path)
|
data_path = os.path.dirname(self.model_path)
|
||||||
data_path = os.path.join(os.path.dirname(self.model_path), '..', f'raw/{self.task_name}.csv.gz')
|
data_path = os.path.join(os.path.dirname(self.model_path), '..', f'raw/{self.task_name}.csv.gz')
|
||||||
df = pd.read_csv(data_path)
|
df = pd.read_csv(data_path)
|
||||||
|
0
graph_dit/workingdoc.md
Normal file
0
graph_dit/workingdoc.md
Normal file
Reference in New Issue
Block a user