diff --git a/.latent-data/qlib b/.latent-data/qlib index d47e35d..d4aa681 160000 --- a/.latent-data/qlib +++ b/.latent-data/qlib @@ -1 +1 @@ -Subproject commit d47e35d64e274524df3bbafa6b159714a699ccaa +Subproject commit d4aa6816520d306503a1f80c1834b37a9df83c3d diff --git a/configs/qlib/workflow_config_naive_Alpha360.yaml b/configs/qlib/workflow_config_naive_Alpha360.yaml new file mode 100644 index 0000000..13075fa --- /dev/null +++ b/configs/qlib/workflow_config_naive_Alpha360.yaml @@ -0,0 +1,64 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market all +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + infer_processors: [] + learn_processors: [] + label: ["Ref($close, -2) / Ref($close, -1) - 1"] +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: NAIVE + module_path: trade_models.naive_model + kwargs: + d_feat: 6 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha360 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SignalMseRecord + module_path: qlib.contrib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/exps/trading/baselines.py b/exps/trading/baselines.py index a368f17..b03c096 100644 --- a/exps/trading/baselines.py +++ b/exps/trading/baselines.py @@ -5,6 +5,7 @@ # python exps/trading/baselines.py --alg GRU # # python exps/trading/baselines.py --alg LSTM # # python exps/trading/baselines.py --alg ALSTM # +# python exps/trading/baselines.py --alg NAIVE # # # # python exps/trading/baselines.py --alg SFM # # python exps/trading/baselines.py --alg XGBoost # @@ -52,6 +53,7 @@ def retrieve_configs(): # DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis, https://arxiv.org/pdf/2010.01265.pdf alg2names["DoubleE"] = "workflow_config_doubleensemble_Alpha360.yaml" alg2names["TabNet"] = "workflow_config_TabNet_Alpha360.yaml" + alg2names["NAIVE"] = "workflow_config_naive_Alpha360.yaml" # find the yaml paths alg2paths = OrderedDict() diff --git a/lib/trade_models/naive_model.py b/lib/trade_models/naive_model.py new file mode 100755 index 0000000..eda1340 --- /dev/null +++ b/lib/trade_models/naive_model.py @@ -0,0 +1,99 @@ +################################################## +# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021 # +################################################## +# A Simple Model that reused the prices of last day +################################################## +from __future__ import division +from __future__ import print_function + +import random +import numpy as np +import pandas as pd + +from qlib.log import get_module_logger + +from qlib.model.base import Model +from qlib.data.dataset import DatasetH +from qlib.data.dataset.handler import DataHandlerLP + + +class NAIVE(Model): + """NAIVE Quant Model""" + + def __init__(self, d_feat=6, seed=None, **kwargs): + # Set logger. + self.logger = get_module_logger("NAIVE") + self.logger.info("NAIVE version...") + + # set hyper-parameters. + self.d_feat = d_feat + self.seed = seed + + self.logger.info( + "NAIVE parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed)) + + if self.seed is not None: + random.seed(self.seed) + np.random.seed(self.seed) + + self.fitted = False + + def process_data(self, features): + features = features.reshape(len(features), self.d_feat, -1) + features = features.transpose((0, 2, 1)) + return features[:, :59, 0] + + def mse(self, preds, labels): + masks = ~np.isnan(labels) + masked_preds = preds[masks] + masked_labels= labels[masks] + return np.square(masked_preds - masked_labels).mean() + + def model(self, x): + x = 1 / x - 1 + masks = ~np.isnan(x) + results = [] + for rowd, rowm in zip(x, masks): + temp = rowd[rowm] + if rowm.any(): + results.append(float(rowd[rowm][-1])) + else: + results.append(0) + return np.array(results, dtype=x.dtype) + + def fit( + self, + dataset: DatasetH + ): + def _prepare_dataset(df_data): + features = df_data["feature"].values + features = self.process_data(features) + labels = df_data["label"].values.squeeze() + return dict(features=features, labels=labels) + + df_train, df_valid, df_test = dataset.prepare( + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, + ) + train_dataset, valid_dataset, test_dataset = ( + _prepare_dataset(df_train), + _prepare_dataset(df_valid), + _prepare_dataset(df_test), + ) + # df_train['feature']['CLOSE1'].values + # train_dataset['features'][:, -1] + train_mse_loss = self.mse(self.model(train_dataset['features']), train_dataset['labels']) + valid_mse_loss = self.mse(self.model(valid_dataset['features']), valid_dataset['labels']) + self.logger.info("Training MSE loss: {:}".format(train_mse_loss)) + self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss)) + self.fitted = True + + def predict(self, dataset): + if not self.fitted: + raise ValueError("The model is not fitted yet!") + x_test = dataset.prepare("test", col_set="feature") + index = x_test.index + + preds = self.model(self.process_data(x_test.values)) + return pd.Series(preds, index=index)