diff --git a/.latent-data/qlib b/.latent-data/qlib index 88b0871..aa552fd 160000 --- a/.latent-data/qlib +++ b/.latent-data/qlib @@ -1 +1 @@ -Subproject commit 88b0871c12d0b139da489c53e02444606f6ca634 +Subproject commit aa552fdb2089cf5b4396a6b75191d2c13211b42d diff --git a/configs/qlib/workflow_config_naive_Alpha360.yaml b/configs/qlib/workflow_config_naive_v1_Alpha360.yaml similarity index 96% rename from configs/qlib/workflow_config_naive_Alpha360.yaml rename to configs/qlib/workflow_config_naive_v1_Alpha360.yaml index 13075fa..47414d6 100644 --- a/configs/qlib/workflow_config_naive_Alpha360.yaml +++ b/configs/qlib/workflow_config_naive_v1_Alpha360.yaml @@ -30,8 +30,8 @@ port_analysis_config: &port_analysis_config min_cost: 5 task: model: - class: NAIVE - module_path: trade_models.naive_model + class: NAIVE_V1 + module_path: trade_models.naive_v1_model kwargs: d_feat: 6 dataset: diff --git a/configs/qlib/workflow_config_naive_v2_Alpha360.yaml b/configs/qlib/workflow_config_naive_v2_Alpha360.yaml new file mode 100644 index 0000000..db2bf40 --- /dev/null +++ b/configs/qlib/workflow_config_naive_v2_Alpha360.yaml @@ -0,0 +1,64 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market all +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + infer_processors: [] + learn_processors: [] + label: ["Ref($close, -2) / Ref($close, -1) - 1"] +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: NAIVE_V2 + module_path: trade_models.naive_v2_model + kwargs: + d_feat: 6 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha360 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SignalMseRecord + module_path: qlib.contrib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/exps/trading/baselines.py b/exps/trading/baselines.py index b03c096..f3501c3 100644 --- a/exps/trading/baselines.py +++ b/exps/trading/baselines.py @@ -5,7 +5,8 @@ # python exps/trading/baselines.py --alg GRU # # python exps/trading/baselines.py --alg LSTM # # python exps/trading/baselines.py --alg ALSTM # -# python exps/trading/baselines.py --alg NAIVE # +# python exps/trading/baselines.py --alg NAIVE-V1 # +# python exps/trading/baselines.py --alg NAIVE-V2 # # # # python exps/trading/baselines.py --alg SFM # # python exps/trading/baselines.py --alg XGBoost # @@ -53,7 +54,8 @@ def retrieve_configs(): # DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis, https://arxiv.org/pdf/2010.01265.pdf alg2names["DoubleE"] = "workflow_config_doubleensemble_Alpha360.yaml" alg2names["TabNet"] = "workflow_config_TabNet_Alpha360.yaml" - alg2names["NAIVE"] = "workflow_config_naive_Alpha360.yaml" + alg2names["NAIVE-V1"] = "workflow_config_naive_v1_Alpha360.yaml" + alg2names["NAIVE-V2"] = "workflow_config_naive_v2_Alpha360.yaml" # find the yaml paths alg2paths = OrderedDict() diff --git a/lib/trade_models/naive_v1_model.py b/lib/trade_models/naive_v1_model.py new file mode 100755 index 0000000..3418003 --- /dev/null +++ b/lib/trade_models/naive_v1_model.py @@ -0,0 +1,88 @@ +################################################## +# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021 # +################################################## +from __future__ import division +from __future__ import print_function + +import random +import numpy as np +import pandas as pd + +from qlib.log import get_module_logger + +from qlib.model.base import Model +from qlib.data.dataset import DatasetH +from qlib.data.dataset.handler import DataHandlerLP + + +class NAIVE_V1(Model): + """NAIVE Version 1 Quant Model""" + + def __init__(self, d_feat=6, seed=None, **kwargs): + # Set logger. + self.logger = get_module_logger("NAIVE") + self.logger.info("NAIVE 1st version: random noise ...") + + # set hyper-parameters. + self.d_feat = d_feat + self.seed = seed + + self.logger.info("NAIVE-V1 parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed)) + + if self.seed is not None: + random.seed(self.seed) + np.random.seed(self.seed) + self._mean = None + self._std = None + self.fitted = False + + def process_data(self, features): + features = features.reshape(len(features), self.d_feat, -1) + features = features.transpose((0, 2, 1)) + return features[:, :59, 0] + + def mse(self, preds, labels): + masks = ~np.isnan(labels) + masked_preds = preds[masks] + masked_labels = labels[masks] + return np.square(masked_preds - masked_labels).mean() + + def model(self, x): + num = len(x) + return np.random.normal(loc=self._mean, scale=self._std, size=num).astype(x.dtype) + + def fit(self, dataset: DatasetH): + def _prepare_dataset(df_data): + features = df_data["feature"].values + features = self.process_data(features) + labels = df_data["label"].values.squeeze() + return dict(features=features, labels=labels) + + df_train, df_valid, df_test = dataset.prepare( + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, + ) + train_dataset, valid_dataset, test_dataset = ( + _prepare_dataset(df_train), + _prepare_dataset(df_valid), + _prepare_dataset(df_test), + ) + # df_train['feature']['CLOSE1'].values + # train_dataset['features'][:, -1] + masks = ~np.isnan(train_dataset["labels"]) + self._mean, self._std = np.mean(train_dataset["labels"][masks]), np.std(train_dataset["labels"][masks]) + train_mse_loss = self.mse(self.model(train_dataset["features"]), train_dataset["labels"]) + valid_mse_loss = self.mse(self.model(valid_dataset["features"]), valid_dataset["labels"]) + self.logger.info("Training MSE loss: {:}".format(train_mse_loss)) + self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss)) + self.fitted = True + + def predict(self, dataset): + if not self.fitted: + raise ValueError("The model is not fitted yet!") + x_test = dataset.prepare("test", col_set="feature") + index = x_test.index + + preds = self.model(self.process_data(x_test.values)) + return pd.Series(preds, index=index) diff --git a/lib/trade_models/naive_model.py b/lib/trade_models/naive_v2_model.py similarity index 81% rename from lib/trade_models/naive_model.py rename to lib/trade_models/naive_v2_model.py index eda1340..3d1a6bf 100755 --- a/lib/trade_models/naive_model.py +++ b/lib/trade_models/naive_v2_model.py @@ -17,8 +17,8 @@ from qlib.data.dataset import DatasetH from qlib.data.dataset.handler import DataHandlerLP -class NAIVE(Model): - """NAIVE Quant Model""" +class NAIVE_V2(Model): + """NAIVE Version 2 Quant Model""" def __init__(self, d_feat=6, seed=None, **kwargs): # Set logger. @@ -29,8 +29,7 @@ class NAIVE(Model): self.d_feat = d_feat self.seed = seed - self.logger.info( - "NAIVE parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed)) + self.logger.info("NAIVE parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed)) if self.seed is not None: random.seed(self.seed) @@ -46,7 +45,7 @@ class NAIVE(Model): def mse(self, preds, labels): masks = ~np.isnan(labels) masked_preds = preds[masks] - masked_labels= labels[masks] + masked_labels = labels[masks] return np.square(masked_preds - masked_labels).mean() def model(self, x): @@ -54,17 +53,14 @@ class NAIVE(Model): masks = ~np.isnan(x) results = [] for rowd, rowm in zip(x, masks): - temp = rowd[rowm] - if rowm.any(): - results.append(float(rowd[rowm][-1])) - else: - results.append(0) + temp = rowd[rowm] + if rowm.any(): + results.append(float(rowd[rowm][-1])) + else: + results.append(0) return np.array(results, dtype=x.dtype) - def fit( - self, - dataset: DatasetH - ): + def fit(self, dataset: DatasetH): def _prepare_dataset(df_data): features = df_data["feature"].values features = self.process_data(features) @@ -83,8 +79,8 @@ class NAIVE(Model): ) # df_train['feature']['CLOSE1'].values # train_dataset['features'][:, -1] - train_mse_loss = self.mse(self.model(train_dataset['features']), train_dataset['labels']) - valid_mse_loss = self.mse(self.model(valid_dataset['features']), valid_dataset['labels']) + train_mse_loss = self.mse(self.model(train_dataset["features"]), train_dataset["labels"]) + valid_mse_loss = self.mse(self.model(valid_dataset["features"]), valid_dataset["labels"]) self.logger.info("Training MSE loss: {:}".format(train_mse_loss)) self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss)) self.fitted = True diff --git a/scripts/trade/baseline.sh b/scripts/trade/baseline.sh index 8b48326..05a0de8 100644 --- a/scripts/trade/baseline.sh +++ b/scripts/trade/baseline.sh @@ -16,7 +16,7 @@ fi gpu=$1 market=$2 -algorithms="NAIVE MLP GRU LSTM ALSTM XGBoost LightGBM SFM TabNet DoubleE" +algorithms="NAIVE-V1 NAIVE-V2 MLP GRU LSTM ALSTM XGBoost LightGBM SFM TabNet DoubleE" for alg in ${algorithms} do