132 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			132 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#####################################################
 | 
						|
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.02 #
 | 
						|
#####################################################
 | 
						|
# python exps/trading/baselines.py --alg GRU
 | 
						|
# python exps/trading/baselines.py --alg LSTM
 | 
						|
# python exps/trading/baselines.py --alg ALSTM
 | 
						|
# python exps/trading/baselines.py --alg XGBoost
 | 
						|
# python exps/trading/baselines.py --alg LightGBM
 | 
						|
#####################################################
 | 
						|
import sys, argparse
 | 
						|
from collections import OrderedDict
 | 
						|
from pathlib import Path
 | 
						|
from pprint import pprint
 | 
						|
import ruamel.yaml as yaml
 | 
						|
 | 
						|
lib_dir = (Path(__file__).parent / ".." / ".." / "lib").resolve()
 | 
						|
if str(lib_dir) not in sys.path:
 | 
						|
    sys.path.insert(0, str(lib_dir))
 | 
						|
 | 
						|
import qlib
 | 
						|
from qlib.utils import init_instance_by_config
 | 
						|
from qlib.workflow import R
 | 
						|
from qlib.utils import flatten_dict
 | 
						|
from qlib.log import set_log_basic_config
 | 
						|
 | 
						|
 | 
						|
def retrieve_configs():
 | 
						|
    # https://github.com/microsoft/qlib/blob/main/examples/benchmarks/
 | 
						|
    config_dir = (lib_dir / ".." / "configs" / "qlib").resolve()
 | 
						|
    # algorithm to file names
 | 
						|
    alg2names = OrderedDict()
 | 
						|
    alg2names["GRU"] = "workflow_config_gru_Alpha360.yaml"
 | 
						|
    alg2names["LSTM"] = "workflow_config_lstm_Alpha360.yaml"
 | 
						|
    # A dual-stage attention-based recurrent neural network for time series prediction, IJCAI-2017
 | 
						|
    alg2names["ALSTM"] = "workflow_config_alstm_Alpha360.yaml"
 | 
						|
    # XGBoost: A Scalable Tree Boosting System, KDD-2016
 | 
						|
    alg2names["XGBoost"] = "workflow_config_xgboost_Alpha360.yaml"
 | 
						|
    # LightGBM: A Highly Efficient Gradient Boosting Decision Tree, NeurIPS-2017
 | 
						|
    alg2names["LightGBM"] = "workflow_config_lightgbm_Alpha360.yaml"
 | 
						|
 | 
						|
    # find the yaml paths
 | 
						|
    alg2paths = OrderedDict()
 | 
						|
    for idx, (alg, name) in enumerate(alg2names.items()):
 | 
						|
        path = config_dir / name
 | 
						|
        assert path.exists(), "{:} does not exist.".format(path)
 | 
						|
        alg2paths[alg] = str(path)
 | 
						|
        print("The {:02d}/{:02d}-th baseline algorithm is {:9s} ({:}).".format(idx, len(alg2names), alg, path))
 | 
						|
    return alg2paths
 | 
						|
 | 
						|
 | 
						|
def update_gpu(config, gpu):
 | 
						|
    config = config.copy()
 | 
						|
    if "GPU" in config["task"]["model"]:
 | 
						|
        config["task"]["model"]["GPU"] = gpu
 | 
						|
    return config
 | 
						|
 | 
						|
 | 
						|
def update_market(config, market):
 | 
						|
    config = config.copy()
 | 
						|
    config["market"] = market
 | 
						|
    config["data_handler_config"]["instruments"] = market
 | 
						|
    return config
 | 
						|
 | 
						|
 | 
						|
def run_exp(task_config, dataset, experiment_name, recorder_name, uri):
 | 
						|
 | 
						|
    # model initiaiton
 | 
						|
    print('')
 | 
						|
    print('[{:}] - [{:}]: {:}'.format(experiment_name, recorder_name, uri))
 | 
						|
    print('dataset={:}'.format(dataset))
 | 
						|
 | 
						|
    model = init_instance_by_config(task_config["model"])
 | 
						|
 | 
						|
    # start exp
 | 
						|
    with R.start(experiment_name=experiment_name, recorder_name=recorder_name, uri=uri):
 | 
						|
 | 
						|
        log_file = R.get_recorder().root_uri / '{:}.log'.format(experiment_name)
 | 
						|
        set_log_basic_config(log_file)
 | 
						|
 | 
						|
        # train model
 | 
						|
        R.log_params(**flatten_dict(task_config))
 | 
						|
        model.fit(dataset)
 | 
						|
        recorder = R.get_recorder()
 | 
						|
        R.save_objects(**{"model.pkl": model})
 | 
						|
 | 
						|
        # generate records: prediction, backtest, and analysis
 | 
						|
        for record in task_config["record"]:
 | 
						|
            record = record.copy()
 | 
						|
            if record["class"] == "SignalRecord":
 | 
						|
                srconf = {"model": model, "dataset": dataset, "recorder": recorder}
 | 
						|
                record["kwargs"].update(srconf)
 | 
						|
                sr = init_instance_by_config(record)
 | 
						|
                sr.generate()
 | 
						|
            else:
 | 
						|
                rconf = {"recorder": recorder}
 | 
						|
                record["kwargs"].update(rconf)
 | 
						|
                ar = init_instance_by_config(record)
 | 
						|
                ar.generate()
 | 
						|
 | 
						|
 | 
						|
def main(xargs, exp_yaml):
 | 
						|
    assert Path(exp_yaml).exists(), "{:} does not exist.".format(exp_yaml)
 | 
						|
 | 
						|
    with open(exp_yaml) as fp:
 | 
						|
        config = yaml.safe_load(fp)
 | 
						|
    config = update_gpu(config, xargs.gpu)
 | 
						|
    # config = update_market(config, 'csi300')
 | 
						|
 | 
						|
    qlib.init(**config.get("qlib_init"))
 | 
						|
    dataset_config = config.get("task").get("dataset")
 | 
						|
    dataset = init_instance_by_config(dataset_config)
 | 
						|
    pprint('args: {:}'.format(xargs))
 | 
						|
    pprint(dataset_config)
 | 
						|
    pprint(dataset)
 | 
						|
 | 
						|
    for irun in range(xargs.times):
 | 
						|
        run_exp(config.get("task"), dataset, xargs.alg, "recorder-{:02d}-{:02d}".format(irun, xargs.times), xargs.save_dir)
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
 | 
						|
    alg2paths = retrieve_configs()
 | 
						|
 | 
						|
    parser = argparse.ArgumentParser("Baselines")
 | 
						|
    parser.add_argument("--save_dir", type=str, default="./outputs/qlib-baselines", help="The checkpoint directory.")
 | 
						|
    parser.add_argument("--times", type=int, default=10, help="The repeated run times.")
 | 
						|
    parser.add_argument("--gpu", type=int, default=0, help="The GPU ID used for train / test.")
 | 
						|
    parser.add_argument("--alg", type=str, choices=list(alg2paths.keys()), required=True, help="The algorithm name.")
 | 
						|
    args = parser.parse_args()
 | 
						|
 | 
						|
    main(args, alg2paths[args.alg])
 |