Update baselines

2021-03-05 13:11:26 +00:00
parent 2e0325ca63
commit 2fa358fdf6
5 changed files with 286 additions and 5 deletions
--- a/.latent-data/qlib
+++ b/.latent-data/qlib
--- a/configs/qlib/workflow_config_alstm_Alpha360.yaml
+++ b/configs/qlib/workflow_config_alstm_Alpha360.yaml
@@ -0,0 +1,83 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market all
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: ALSTM
+        module_path: qlib.contrib.model.pytorch_alstm
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: loss
+            loss: mse
+            GPU: 0
+            rnn_type: GRU
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/configs/qlib/workflow_config_lightgbm_Alpha360.yaml
+++ b/configs/qlib/workflow_config_lightgbm_Alpha360.yaml
@@ -0,0 +1,73 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market all
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors: []
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.0421
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/exps/trading/baselines.py
+++ b/exps/trading/baselines.py
@@ -0,0 +1,127 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.02 #
+#####################################################
+# python exps/trading/baselines.py --alg GRU
+# python exps/trading/baselines.py --alg LSTM
+# python exps/trading/baselines.py --alg ALSTM
+# python exps/trading/baselines.py --alg XGBoost
+# python exps/trading/baselines.py --alg LightGBM
+#####################################################
+import sys, argparse
+from collections import OrderedDict
+from pathlib import Path
+from pprint import pprint
+import ruamel.yaml as yaml
+
+lib_dir = (Path(__file__).parent / ".." / ".." / "lib").resolve()
+if str(lib_dir) not in sys.path:
+    sys.path.insert(0, str(lib_dir))
+
+import qlib
+from qlib.utils import init_instance_by_config
+from qlib.workflow import R
+from qlib.utils import flatten_dict
+from qlib.log import set_log_basic_config
+
+
+def retrieve_configs():
+    # https://github.com/microsoft/qlib/blob/main/examples/benchmarks/
+    config_dir = (lib_dir / ".." / "configs" / "qlib").resolve()
+    # algorithm to file names
+    alg2names = OrderedDict()
+    alg2names["GRU"] = "workflow_config_gru_Alpha360.yaml"
+    alg2names["LSTM"] = "workflow_config_lstm_Alpha360.yaml"
+    # A dual-stage attention-based recurrent neural network for time series prediction, IJCAI-2017
+    alg2names["ALSTM"] = "workflow_config_alstm_Alpha360.yaml"
+    # XGBoost: A Scalable Tree Boosting System, KDD-2016
+    alg2names["XGBoost"] = "workflow_config_xgboost_Alpha360.yaml"
+    # LightGBM: A Highly Efficient Gradient Boosting Decision Tree, NeurIPS-2017
+    alg2names["LightGBM"] = "workflow_config_lightgbm_Alpha360.yaml"
+
+    # find the yaml paths
+    alg2paths = OrderedDict()
+    for idx, (alg, name) in enumerate(alg2names.items()):
+        path = config_dir / name
+        assert path.exists(), "{:} does not exist.".format(path)
+        alg2paths[alg] = str(path)
+        print("The {:02d}/{:02d}-th baseline algorithm is {:9s} ({:}).".format(idx, len(alg2names), alg, path))
+    return alg2paths
+
+
+def update_gpu(config, gpu):
+    config = config.copy()
+    if "GPU" in config["task"]["model"]:
+        config["task"]["model"]["GPU"] = gpu
+    return config
+
+
+def update_market(config, market):
+    config = config.copy()
+    config["market"] = market
+    config["data_handler_config"]["instruments"] = market
+    return config
+
+
+def run_exp(task_config, dataset, experiment_name, recorder_name, uri):
+
+    # model initiaiton
+    model = init_instance_by_config(task_config["model"])
+
+    # start exp
+    with R.start(experiment_name=experiment_name, recorder_name=recorder_name, uri=uri):
+
+        log_file = R.get_recorder().root_uri / '{:}.log'.format(experiment_name)
+        set_log_basic_config(log_file)
+
+        # train model
+        R.log_params(**flatten_dict(task_config))
+        model.fit(dataset)
+        recorder = R.get_recorder()
+        R.save_objects(**{"model.pkl": model})
+
+        # generate records: prediction, backtest, and analysis
+        for record in task_config["record"]:
+            record = record.copy()
+            if record["class"] == "SignalRecord":
+                srconf = {"model": model, "dataset": dataset, "recorder": recorder}
+                record["kwargs"].update(srconf)
+                sr = init_instance_by_config(record)
+                sr.generate()
+            else:
+                rconf = {"recorder": recorder}
+                record["kwargs"].update(rconf)
+                ar = init_instance_by_config(record)
+                ar.generate()
+
+
+def main(xargs, exp_yaml):
+    assert Path(exp_yaml).exists(), "{:} does not exist.".format(exp_yaml)
+
+    with open(exp_yaml) as fp:
+        config = yaml.safe_load(fp)
+    config = update_gpu(config, xargs.gpu)
+    # config = update_market(config, 'csi300')
+
+    qlib.init(**config.get("qlib_init"))
+    dataset_config = config.get("task").get("dataset")
+    dataset = init_instance_by_config(dataset_config)
+    pprint('args: {:}'.format(xargs))
+    pprint(dataset_config)
+    pprint(dataset)
+
+    for irun in range(xargs.times):
+        run_exp(config.get("task"), dataset, xargs.alg, "recorder-{:02d}-{:02d}".format(irun, xargs.times), xargs.save_dir)
+
+
+if __name__ == "__main__":
+
+    alg2paths = retrieve_configs()
+
+    parser = argparse.ArgumentParser("Baselines")
+    parser.add_argument("--save_dir", type=str, default="./outputs/qlib-baselines", help="The checkpoint directory.")
+    parser.add_argument("--times", type=int, default=10, help="The repeated run times.")
+    parser.add_argument("--gpu", type=int, default=0, help="The GPU ID used for train / test.")
+    parser.add_argument("--alg", type=str, choices=list(alg2paths.keys()), required=True, help="The algorithm name.")
+    args = parser.parse_args()
+
+    main(args, alg2paths[args.alg])
--- a/exps/trading/workflow_tt.py
+++ b/exps/trading/workflow_tt.py
@@ -104,7 +104,7 @@ def main(xargs):


    # start exp to train model
-    with R.start(experiment_name="train_tt_model"):
+    with R.start(experiment_name="tt_model", uri=xargs.save_dir):
        set_log_basic_config(R.get_recorder().root_uri / 'log.log')

        model = init_instance_by_config(model_config)
@@ -139,8 +139,6 @@ if __name__ == "__main__":
    args = parser.parse_args()

    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    exp_manager = C.exp_manager
-    exp_manager["kwargs"]["uri"] = "file:{:}".format(Path(args.save_dir).resolve())
-    qlib.init(provider_uri=provider_uri, region=REG_CN, exp_manager=exp_manager)
+    qlib.init(provider_uri=provider_uri, region=REG_CN)

    main(args)