In [1]:
#
# Exhaustive Search Results
#
import os
import re
import sys
import qlib
import pprint
import numpy as np
import pandas as pd

from pathlib import Path

__file__ = os.path.dirname(os.path.realpath("__file__"))
root_dir = (Path(__file__).parent / "..").resolve()
lib_dir = (root_dir / "lib").resolve()
print("The root path: {:}".format(root_dir))
print("The library path: {:}".format(lib_dir))
assert lib_dir.exists(), "{:} does not exist".format(lib_dir)
if str(lib_dir) not in sys.path:
    sys.path.insert(0, str(lib_dir))

import qlib
from qlib import config as qconfig
from qlib.workflow import R
qlib.init(provider_uri='~/.qlib/qlib_data/cn_data', region=qconfig.REG_CN)

The root path: /Users/xuanyidong/Desktop/AutoDL-Projects
The library path: /Users/xuanyidong/Desktop/AutoDL-Projects/lib


[70363:MainThread](2021-04-12 13:25:01,065) INFO - qlib.Initialization - [config.py:276] - default_conf: client.
[70363:MainThread](2021-04-12 13:25:01,085) INFO - qlib.Initialization - [__init__.py:46] - qlib successfully initialized based on client settings.
[70363:MainThread](2021-04-12 13:25:01,092) INFO - qlib.Initialization - [__init__.py:47] - data_path=/Users/xuanyidong/.qlib/qlib_data/cn_data


In [2]:
from utils.qlib_utils import QResult

In [3]:
def filter_finished(recorders):
    returned_recorders = dict()
    not_finished = 0
    for key, recorder in recorders.items():
        if recorder.status == "FINISHED":
            returned_recorders[key] = recorder
        else:
            not_finished += 1
    return returned_recorders, not_finished

def query_info(save_dir, verbose, name_filter, key_map):
    if isinstance(save_dir, list):
        results = []
        for x in save_dir:
            x = query_info(x, verbose, name_filter, key_map)
            results.extend(x)
        return results
    # Here, the save_dir must be a string
    R.set_uri(str(save_dir))
    experiments = R.list_experiments()

    if verbose:
        print("There are {:} experiments.".format(len(experiments)))
    qresults = []
    for idx, (key, experiment) in enumerate(experiments.items()):
        if experiment.id == "0":
            continue
        if name_filter is not None and re.fullmatch(name_filter, experiment.name) is None:
            continue
        recorders = experiment.list_recorders()
        recorders, not_finished = filter_finished(recorders)
        if verbose:
            print(
                "====>>>> {:02d}/{:02d}-th experiment {:9s} has {:02d}/{:02d} finished recorders.".format(
                    idx + 1,
                    len(experiments),
                    experiment.name,
                    len(recorders),
                    len(recorders) + not_finished,
                )
            )
        result = QResult(experiment.name)
        for recorder_id, recorder in recorders.items():
            result.update(recorder.list_metrics(), key_map)
            result.append_path(
                os.path.join(recorder.uri, recorder.experiment_id, recorder.id)
            )
        if not len(result):
            print("There are no valid recorders for {:}".format(experiment))
            continue
        else:
            if verbose:
                print(
                    "There are {:} valid recorders for {:}".format(
                        len(recorders), experiment.name
                    )
                )
        qresults.append(result)
    return qresults

In [4]:
paths = [root_dir / 'outputs' / 'qlib-baselines-csi300']
paths = [path.resolve() for path in paths]
print(paths)

key_map = dict()
for xset in ("train", "valid", "test"):
    key_map["{:}-mean-IC".format(xset)] = "IC ({:})".format(xset)
    key_map["{:}-mean-ICIR".format(xset)] = "ICIR ({:})".format(xset)

qresults = query_info(paths, False, 'TSF-.*', key_map)

[70363:MainThread](2021-04-12 13:25:01,647) INFO - qlib.workflow - [expm.py:290] - <mlflow.tracking.client.MlflowClient object at 0x7fa920e56820>


[PosixPath('/Users/xuanyidong/Desktop/AutoDL-Projects/outputs/qlib-baselines-csi300')]


In [5]:
import matplotlib
from matplotlib import cm
matplotlib.use("agg")
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

In [8]:
def vis_dropouts(qresults, basenames, name2suffix, save_path):
    save_dir = (save_path / '..').resolve()
    save_dir.mkdir(parents=True, exist_ok=True)
    print('There are {:} qlib-results'.format(len(qresults)))
    
    name2qresult = dict()
    for qresult in qresults:
        name2qresult[qresult.name] = qresult
    # sort architectures
    accuracies = []
    for basename in basenames:
        qresult = name2qresult[basename + '-drop0_0']
        accuracies.append(qresult['ICIR (train)'])
    sorted_basenames = sorted(basenames, key=lambda x: accuracies[basenames.index(x)])
    
    dpi, width, height = 200, 4000, 2000
    figsize = width / float(dpi), height / float(dpi)
    LabelSize, LegendFontsize = 22, 22
    font_gap = 5
    colors = ['k', 'r']
    markers = ['*', 'o']
    
    fig = plt.figure(figsize=figsize)
    
    def plot_ax(cur_ax, train_or_test):
        for idx, (legend, suffix) in enumerate(name2suffix.items()):
            x_values = list(range(len(sorted_basenames)))
            y_values = []
            for i, name in enumerate(sorted_basenames):
                name = '{:}{:}'.format(name, suffix)
                qresult = name2qresult[name]
                if train_or_test:
                    value = qresult['IC (train)']
                else:
                    value = qresult['IC (valid)']
                y_values.append(value)
            cur_ax.plot(x_values, y_values, c=colors[idx])
            cur_ax.scatter(x_values, y_values,
                           marker=markers[idx], s=3, c=colors[idx], alpha=0.9,
                           label=legend)
        cur_ax.set_yticks(np.arange(4, 11, 2))
        cur_ax.set_xlabel("sorted architectures", fontsize=LabelSize)
        cur_ax.set_ylabel("{:} IC (%)".format('training' if train_or_test else 'validation'), fontsize=LabelSize)
        for tick in cur_ax.xaxis.get_major_ticks():
            tick.label.set_fontsize(LabelSize - font_gap)
        for tick in cur_ax.yaxis.get_major_ticks():
            tick.label.set_fontsize(LabelSize - font_gap)
        cur_ax.legend(loc=4, fontsize=LegendFontsize)
    ax = fig.add_subplot(1, 2, 1)
    plot_ax(ax, True)
    ax = fig.add_subplot(1, 2, 2)
    plot_ax(ax, False)
    # fig.tight_layout()
    # plt.subplots_adjust(wspace=0.05)#, hspace=0.4)
    fig.savefig(save_path, dpi=dpi, bbox_inches="tight", format="pdf")
    plt.close("all")

In [9]:
# Visualization
names = [qresult.name for qresult in qresults]
base_names = set()
for name in names:
    base_name = name.split('-drop')[0]
    base_names.add(base_name)
print(base_names)
# filter
filtered_base_names = set()
for base_name in base_names:
    if (base_name + '-drop0_0') in names and (base_name + '-drop0.1_0') in names:
        filtered_base_names.add(base_name)
    else:
        print('Cannot find all names for {:}'.format(base_name))
# print(filtered_base_names)
home_dir = Path.home()
desktop_dir = home_dir / 'Desktop'
print('The Desktop is at: {:}'.format(desktop_dir))

vis_dropouts(qresults, list(filtered_base_names),
             {'No-dropout': '-drop0_0',
              'Ratio=0.1' : '-drop0.1_0'},
             desktop_dir / 'es_csi300_drop.pdf')

{'TSF-3x48', 'TSF-2x64', 'TSF-2x12', 'TSF-8x48', 'TSF-6x32', 'TSF-4x48', 'TSF-8x6', 'TSF-4x6', 'TSF-2x32', 'TSF-5x12', 'TSF-5x64', 'TSF-1x64', 'TSF-2x24', 'TSF-8x24', 'TSF-4x12', 'TSF-6x12', 'TSF-1x32', 'TSF-5x32', 'TSF-3x24', 'TSF-8x12', 'TSF-5x48', 'TSF-6x64', 'TSF-7x64', 'TSF-7x48', 'TSF-1x6', 'TSF-2x48', 'TSF-7x24', 'TSF-3x32', 'TSF-1x24', 'TSF-4x64', 'TSF-3x12', 'TSF-8x64', 'TSF-4x32', 'TSF-5x6', 'TSF-7x6', 'TSF-7x12', 'TSF-3x6', 'TSF-4x24', 'TSF-6x48', 'TSF-6x6', 'TSF-1x48', 'TSF-1x12', 'TSF-7x32', 'TSF-5x24', 'TSF-2x6', 'TSF-6x24', 'TSF-3x64', 'TSF-8x32'}
The Desktop is at: /Users/xuanyidong/Desktop
There are 104 qlib-results
