Graph-DiT/graph_dit/test_perf.py

from nas_201_api import NASBench201API as API
import re
import pandas as pd
import json
import numpy as np
import argparse

api = API('./NAS-Bench-201-v1_1-096897.pth')

parser = argparse.ArgumentParser(description='Process some integers.')

parser.add_argument('--file_path', type=str, default='211035.txt',)
parser.add_argument('--datasets', type=str, default='cifar10',)
args = parser.parse_args()

def process_graph_data(text):
    # Split the input text into sections for each graph
    graph_sections = text.strip().split('nodes:')
    
    # Prepare lists to store data
    nodes_list = []
    edges_list = []
    results_list = []
    
    for section in graph_sections[1:]:
        # Extract nodes
        nodes_section = section.split('edges:')[0]
        nodes_match = re.search(r'(tensor\(\d+\) ?)+', section)
        if nodes_match:
            nodes = re.findall(r'tensor\((\d+)\)', nodes_match.group(0))
            nodes_list.append(nodes)
        
        # Extract edges
        edge_section = section.split('edges:')[1]
        edges_match = re.search(r'edges:', section)
        if edges_match:
            edges = re.findall(r'tensor\((\d+)\)', edge_section)
            edges_list.append(edges)
        
        # Extract the last floating point number as a result
    
    # Create a DataFrame to store the extracted data
    data = {
        'nodes': nodes_list,
        'edges': edges_list,
    }
    data['nodes'] = [[int(x) for x in node] for node in data['nodes']]
    data['edges'] = [[int(x) for x in edge] for edge in data['edges']]
    def split_list(input_list, chunk_size):
        return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
    data['edges'] = [split_list(edge, 8) for edge in data['edges']]

    print(data)
    df = pd.DataFrame(data)
    print('df')
    print(df['nodes'][0], df['edges'][0])
    return df

def is_valid_nasbench201(adj, ops):
    print(ops)
    if ops[0] != 0 or ops[-1] != 6:
        return False
    for i in range(2, len(ops) - 1):
        if ops[i] not in [1, 2, 3, 4, 5]:
            return False
    adj_mat = [ [0, 1, 1, 0, 1, 0, 0, 0],
                [0, 0, 0, 1, 0, 1 ,0 ,0],
                [0, 0, 0, 0, 0, 0, 1, 0],
                [0, 0, 0, 0, 0, 0, 1, 0],
                [0, 0, 0, 0, 0, 0, 0, 1],
                [0, 0, 0, 0, 0, 0, 0, 1],
                [0, 0, 0, 0, 0, 0, 0, 1],
                [0, 0, 0, 0, 0, 0, 0, 0]]
 
    for i in range(len(adj)):
        for j in range(len(adj[i])):
            if adj[i][j] not in [0, 1]:
                return False
            if j > i:
                if adj[i][j] != adj_mat[i][j]:
                    return False
    return True

num_to_op = ['input', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3', 'skip_connect', 'none', 'output']
def nodes_to_arch_str(nodes):
    nodes_str = [num_to_op[node] for node in nodes]
    arch_str = '|' + nodes_str[1] + '~0|+' + \
               '|' + nodes_str[2] + '~0|' + nodes_str[3] + '~1|+' +\
               '|' + nodes_str[4] + '~0|' + nodes_str[5] + '~1|' + nodes_str[6] + '~2|' 
    return arch_str

filename = args.file_path
datasets_name = args.datasets

with open('./output_graphs/' + filename, 'r') as f:
    texts = f.read()
    df = process_graph_data(texts)
    valid = 0
    not_valid = 0
    scores = []

    # 定义分类标准和分布字典的映射
    thresholds = {
        'cifar10': [90, 91, 92, 93, 94],
        'cifar100': [68,69,70, 71, 72, 73]
    }
    dist = {f'<{threshold}': 0 for threshold in thresholds[datasets_name]}
    dist[f'>{thresholds[datasets_name][-1]}'] = 0

    for i in range(len(df)):
        nodes = df['nodes'][i]
        edges = df['edges'][i]
        result = is_valid_nasbench201(edges, nodes)
        if result:
            valid += 1
            arch_str = nodes_to_arch_str(nodes)
            index = api.query_index_by_arch(arch_str)
            res = api.get_more_info(index, datasets_name, None, hp=200, is_random=False)
            acc = res['test-accuracy']
            scores.append((index, acc))

            # 根据阈值更新分布
            updated = False
            for threshold in thresholds[datasets_name]:
                if acc < threshold:
                    dist[f'<{threshold}'] += 1
                    updated = True
                    break
            if not updated:
                dist[f'>{thresholds[datasets_name][-1]}'] += 1
        else:
            not_valid += 1

    with open('./output_graphs/' + filename + '_' + datasets_name +'.json', 'w') as f:
        json.dump(scores, f)

    print(scores)
    print(valid, not_valid)
    print(dist)
    print("mean: ", np.mean([x[1] for x in scores]))
    print("max: ", np.max([x[1] for x in scores]))
    print("min: ", np.min([x[1] for x in scores]))
add a test performance script. 2024-08-26 20:12:47 +02:00			`from nas_201_api import NASBench201API as API`
			`import re`
			`import pandas as pd`
			`import json`
			`import numpy as np`
			`import argparse`

			`api = API('./NAS-Bench-201-v1_1-096897.pth')`

			`parser = argparse.ArgumentParser(description='Process some integers.')`

			`parser.add_argument('--file_path', type=str, default='211035.txt',)`
adjust threshhold for cifar100 2024-08-29 10:37:42 +02:00			`parser.add_argument('--datasets', type=str, default='cifar10',)`
add a test performance script. 2024-08-26 20:12:47 +02:00			`args = parser.parse_args()`

			`def process_graph_data(text):`
			`# Split the input text into sections for each graph`
			`graph_sections = text.strip().split('nodes:')`

			`# Prepare lists to store data`
			`nodes_list = []`
			`edges_list = []`
			`results_list = []`

			`for section in graph_sections[1:]:`
			`# Extract nodes`
			`nodes_section = section.split('edges:')[0]`
			`nodes_match = re.search(r'(tensor\(\d+\) ?)+', section)`
			`if nodes_match:`
			`nodes = re.findall(r'tensor\((\d+)\)', nodes_match.group(0))`
			`nodes_list.append(nodes)`

			`# Extract edges`
			`edge_section = section.split('edges:')[1]`
			`edges_match = re.search(r'edges:', section)`
			`if edges_match:`
			`edges = re.findall(r'tensor\((\d+)\)', edge_section)`
			`edges_list.append(edges)`

			`# Extract the last floating point number as a result`

			`# Create a DataFrame to store the extracted data`
			`data = {`
			`'nodes': nodes_list,`
			`'edges': edges_list,`
			`}`
			`data['nodes'] = [[int(x) for x in node] for node in data['nodes']]`
			`data['edges'] = [[int(x) for x in edge] for edge in data['edges']]`
			`def split_list(input_list, chunk_size):`
			`return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]`
			`data['edges'] = [split_list(edge, 8) for edge in data['edges']]`

			`print(data)`
			`df = pd.DataFrame(data)`
			`print('df')`
			`print(df['nodes'][0], df['edges'][0])`
			`return df`

			`def is_valid_nasbench201(adj, ops):`
			`print(ops)`
			`if ops[0] != 0 or ops[-1] != 6:`
			`return False`
			`for i in range(2, len(ops) - 1):`
			`if ops[i] not in [1, 2, 3, 4, 5]:`
			`return False`
			`adj_mat = [ [0, 1, 1, 0, 1, 0, 0, 0],`
			`[0, 0, 0, 1, 0, 1 ,0 ,0],`
			`[0, 0, 0, 0, 0, 0, 1, 0],`
			`[0, 0, 0, 0, 0, 0, 1, 0],`
			`[0, 0, 0, 0, 0, 0, 0, 1],`
			`[0, 0, 0, 0, 0, 0, 0, 1],`
			`[0, 0, 0, 0, 0, 0, 0, 1],`
			`[0, 0, 0, 0, 0, 0, 0, 0]]`

			`for i in range(len(adj)):`
			`for j in range(len(adj[i])):`
			`if adj[i][j] not in [0, 1]:`
			`return False`
			`if j > i:`
			`if adj[i][j] != adj_mat[i][j]:`
			`return False`
			`return True`

			`num_to_op = ['input', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3', 'skip_connect', 'none', 'output']`
			`def nodes_to_arch_str(nodes):`
			`nodes_str = [num_to_op[node] for node in nodes]`
			`arch_str = '\|' + nodes_str[1] + '~0\|+' + \`
			`'\|' + nodes_str[2] + '~0\|' + nodes_str[3] + '~1\|+' +\`
			`'\|' + nodes_str[4] + '~0\|' + nodes_str[5] + '~1\|' + nodes_str[6] + '~2\|'`
			`return arch_str`

			`filename = args.file_path`
adjust threshhold for cifar100 2024-08-29 10:37:42 +02:00			`datasets_name = args.datasets`
add a test performance script. 2024-08-26 20:12:47 +02:00
			`with open('./output_graphs/' + filename, 'r') as f:`
			`texts = f.read()`
			`df = process_graph_data(texts)`
			`valid = 0`
			`not_valid = 0`
			`scores = []`
adjust threshhold for cifar100 2024-08-29 10:37:42 +02:00
			`# 定义分类标准和分布字典的映射`
			`thresholds = {`
			`'cifar10': [90, 91, 92, 93, 94],`
			`'cifar100': [68,69,70, 71, 72, 73]`
			`}`
			`dist = {f'<{threshold}': 0 for threshold in thresholds[datasets_name]}`
			`dist[f'>{thresholds[datasets_name][-1]}'] = 0`

add a test performance script. 2024-08-26 20:12:47 +02:00			`for i in range(len(df)):`
			`nodes = df['nodes'][i]`
			`edges = df['edges'][i]`
			`result = is_valid_nasbench201(edges, nodes)`
			`if result:`
			`valid += 1`
			`arch_str = nodes_to_arch_str(nodes)`
			`index = api.query_index_by_arch(arch_str)`
adjust threshhold for cifar100 2024-08-29 10:37:42 +02:00			`res = api.get_more_info(index, datasets_name, None, hp=200, is_random=False)`
add a test performance script. 2024-08-26 20:12:47 +02:00			`acc = res['test-accuracy']`
			`scores.append((index, acc))`
adjust threshhold for cifar100 2024-08-29 10:37:42 +02:00
			`# 根据阈值更新分布`
			`updated = False`
			`for threshold in thresholds[datasets_name]:`
			`if acc < threshold:`
			`dist[f'<{threshold}'] += 1`
			`updated = True`
			`break`
			`if not updated:`
			`dist[f'>{thresholds[datasets_name][-1]}'] += 1`
add a test performance script. 2024-08-26 20:12:47 +02:00			`else:`
			`not_valid += 1`
adjust threshhold for cifar100 2024-08-29 10:37:42 +02:00
			`with open('./output_graphs/' + filename + '_' + datasets_name +'.json', 'w') as f:`
add a test performance script. 2024-08-26 20:12:47 +02:00			`json.dump(scores, f)`
adjust threshhold for cifar100 2024-08-29 10:37:42 +02:00
add a test performance script. 2024-08-26 20:12:47 +02:00			`print(scores)`
			`print(valid, not_valid)`
			`print(dist)`
			`print("mean: ", np.mean([x[1] for x in scores]))`
			`print("max: ", np.max([x[1] for x in scores]))`
			`print("min: ", np.min([x[1] for x in scores]))`
adjust threshhold for cifar100 2024-08-29 10:37:42 +02:00
add a test performance script. 2024-08-26 20:12:47 +02:00