Add baselines
This commit is contained in:
		 Submodule .latent-data/qlib updated: 88b0871c12...aa552fdb20
									
								
							| @@ -30,8 +30,8 @@ port_analysis_config: &port_analysis_config | ||||
|         min_cost: 5 | ||||
| task: | ||||
|     model: | ||||
|         class: NAIVE | ||||
|         module_path: trade_models.naive_model | ||||
|         class: NAIVE_V1 | ||||
|         module_path: trade_models.naive_v1_model | ||||
|         kwargs: | ||||
|             d_feat: 6 | ||||
|     dataset: | ||||
							
								
								
									
										64
									
								
								configs/qlib/workflow_config_naive_v2_Alpha360.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								configs/qlib/workflow_config_naive_v2_Alpha360.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,64 @@ | ||||
| qlib_init: | ||||
|     provider_uri: "~/.qlib/qlib_data/cn_data" | ||||
|     region: cn | ||||
| market: &market all | ||||
| benchmark: &benchmark SH000300 | ||||
| data_handler_config: &data_handler_config | ||||
|     start_time: 2008-01-01 | ||||
|     end_time: 2020-08-01 | ||||
|     fit_start_time: 2008-01-01 | ||||
|     fit_end_time: 2014-12-31 | ||||
|     instruments: *market | ||||
|     infer_processors: [] | ||||
|     learn_processors: [] | ||||
|     label: ["Ref($close, -2) / Ref($close, -1) - 1"] | ||||
| port_analysis_config: &port_analysis_config | ||||
|     strategy: | ||||
|         class: TopkDropoutStrategy | ||||
|         module_path: qlib.contrib.strategy.strategy | ||||
|         kwargs: | ||||
|             topk: 50 | ||||
|             n_drop: 5 | ||||
|     backtest: | ||||
|         verbose: False | ||||
|         limit_threshold: 0.095 | ||||
|         account: 100000000 | ||||
|         benchmark: *benchmark | ||||
|         deal_price: close | ||||
|         open_cost: 0.0005 | ||||
|         close_cost: 0.0015 | ||||
|         min_cost: 5 | ||||
| task: | ||||
|     model: | ||||
|         class: NAIVE_V2 | ||||
|         module_path: trade_models.naive_v2_model | ||||
|         kwargs: | ||||
|             d_feat: 6 | ||||
|     dataset: | ||||
|         class: DatasetH | ||||
|         module_path: qlib.data.dataset | ||||
|         kwargs: | ||||
|             handler: | ||||
|                 class: Alpha360 | ||||
|                 module_path: qlib.contrib.data.handler | ||||
|                 kwargs: *data_handler_config | ||||
|             segments: | ||||
|                 train: [2008-01-01, 2014-12-31] | ||||
|                 valid: [2015-01-01, 2016-12-31] | ||||
|                 test: [2017-01-01, 2020-08-01] | ||||
|     record:  | ||||
|         - class: SignalRecord | ||||
|           module_path: qlib.workflow.record_temp | ||||
|           kwargs: {} | ||||
|         - class: SignalMseRecord | ||||
|           module_path: qlib.contrib.workflow.record_temp | ||||
|           kwargs: {} | ||||
|         - class: SigAnaRecord | ||||
|           module_path: qlib.workflow.record_temp | ||||
|           kwargs:  | ||||
|             ana_long_short: False | ||||
|             ann_scaler: 252 | ||||
|         - class: PortAnaRecord | ||||
|           module_path: qlib.workflow.record_temp | ||||
|           kwargs:  | ||||
|             config: *port_analysis_config | ||||
| @@ -5,7 +5,8 @@ | ||||
| # python exps/trading/baselines.py --alg GRU        # | ||||
| # python exps/trading/baselines.py --alg LSTM       # | ||||
| # python exps/trading/baselines.py --alg ALSTM      # | ||||
| # python exps/trading/baselines.py --alg NAIVE      # | ||||
| # python exps/trading/baselines.py --alg NAIVE-V1   # | ||||
| # python exps/trading/baselines.py --alg NAIVE-V2   # | ||||
| #                                                   # | ||||
| # python exps/trading/baselines.py --alg SFM        # | ||||
| # python exps/trading/baselines.py --alg XGBoost    # | ||||
| @@ -53,7 +54,8 @@ def retrieve_configs(): | ||||
|     # DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis, https://arxiv.org/pdf/2010.01265.pdf | ||||
|     alg2names["DoubleE"] = "workflow_config_doubleensemble_Alpha360.yaml" | ||||
|     alg2names["TabNet"] = "workflow_config_TabNet_Alpha360.yaml" | ||||
|     alg2names["NAIVE"] = "workflow_config_naive_Alpha360.yaml" | ||||
|     alg2names["NAIVE-V1"] = "workflow_config_naive_v1_Alpha360.yaml" | ||||
|     alg2names["NAIVE-V2"] = "workflow_config_naive_v2_Alpha360.yaml" | ||||
|  | ||||
|     # find the yaml paths | ||||
|     alg2paths = OrderedDict() | ||||
|   | ||||
							
								
								
									
										88
									
								
								lib/trade_models/naive_v1_model.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										88
									
								
								lib/trade_models/naive_v1_model.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| ################################################## | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021 # | ||||
| ################################################## | ||||
| from __future__ import division | ||||
| from __future__ import print_function | ||||
|  | ||||
| import random | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
|  | ||||
| from qlib.log import get_module_logger | ||||
|  | ||||
| from qlib.model.base import Model | ||||
| from qlib.data.dataset import DatasetH | ||||
| from qlib.data.dataset.handler import DataHandlerLP | ||||
|  | ||||
|  | ||||
| class NAIVE_V1(Model): | ||||
|     """NAIVE Version 1 Quant Model""" | ||||
|  | ||||
|     def __init__(self, d_feat=6, seed=None, **kwargs): | ||||
|         # Set logger. | ||||
|         self.logger = get_module_logger("NAIVE") | ||||
|         self.logger.info("NAIVE 1st version: random noise ...") | ||||
|  | ||||
|         # set hyper-parameters. | ||||
|         self.d_feat = d_feat | ||||
|         self.seed = seed | ||||
|  | ||||
|         self.logger.info("NAIVE-V1 parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed)) | ||||
|  | ||||
|         if self.seed is not None: | ||||
|             random.seed(self.seed) | ||||
|             np.random.seed(self.seed) | ||||
|         self._mean = None | ||||
|         self._std = None | ||||
|         self.fitted = False | ||||
|  | ||||
|     def process_data(self, features): | ||||
|         features = features.reshape(len(features), self.d_feat, -1) | ||||
|         features = features.transpose((0, 2, 1)) | ||||
|         return features[:, :59, 0] | ||||
|  | ||||
|     def mse(self, preds, labels): | ||||
|         masks = ~np.isnan(labels) | ||||
|         masked_preds = preds[masks] | ||||
|         masked_labels = labels[masks] | ||||
|         return np.square(masked_preds - masked_labels).mean() | ||||
|  | ||||
|     def model(self, x): | ||||
|         num = len(x) | ||||
|         return np.random.normal(loc=self._mean, scale=self._std, size=num).astype(x.dtype) | ||||
|  | ||||
|     def fit(self, dataset: DatasetH): | ||||
|         def _prepare_dataset(df_data): | ||||
|             features = df_data["feature"].values | ||||
|             features = self.process_data(features) | ||||
|             labels = df_data["label"].values.squeeze() | ||||
|             return dict(features=features, labels=labels) | ||||
|  | ||||
|         df_train, df_valid, df_test = dataset.prepare( | ||||
|             ["train", "valid", "test"], | ||||
|             col_set=["feature", "label"], | ||||
|             data_key=DataHandlerLP.DK_L, | ||||
|         ) | ||||
|         train_dataset, valid_dataset, test_dataset = ( | ||||
|             _prepare_dataset(df_train), | ||||
|             _prepare_dataset(df_valid), | ||||
|             _prepare_dataset(df_test), | ||||
|         ) | ||||
|         # df_train['feature']['CLOSE1'].values | ||||
|         # train_dataset['features'][:, -1] | ||||
|         masks = ~np.isnan(train_dataset["labels"]) | ||||
|         self._mean, self._std = np.mean(train_dataset["labels"][masks]), np.std(train_dataset["labels"][masks]) | ||||
|         train_mse_loss = self.mse(self.model(train_dataset["features"]), train_dataset["labels"]) | ||||
|         valid_mse_loss = self.mse(self.model(valid_dataset["features"]), valid_dataset["labels"]) | ||||
|         self.logger.info("Training MSE loss: {:}".format(train_mse_loss)) | ||||
|         self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss)) | ||||
|         self.fitted = True | ||||
|  | ||||
|     def predict(self, dataset): | ||||
|         if not self.fitted: | ||||
|             raise ValueError("The model is not fitted yet!") | ||||
|         x_test = dataset.prepare("test", col_set="feature") | ||||
|         index = x_test.index | ||||
|  | ||||
|         preds = self.model(self.process_data(x_test.values)) | ||||
|         return pd.Series(preds, index=index) | ||||
| @@ -17,8 +17,8 @@ from qlib.data.dataset import DatasetH | ||||
| from qlib.data.dataset.handler import DataHandlerLP | ||||
| 
 | ||||
| 
 | ||||
| class NAIVE(Model): | ||||
|     """NAIVE Quant Model""" | ||||
| class NAIVE_V2(Model): | ||||
|     """NAIVE Version 2 Quant Model""" | ||||
| 
 | ||||
|     def __init__(self, d_feat=6, seed=None, **kwargs): | ||||
|         # Set logger. | ||||
| @@ -29,8 +29,7 @@ class NAIVE(Model): | ||||
|         self.d_feat = d_feat | ||||
|         self.seed = seed | ||||
| 
 | ||||
|         self.logger.info( | ||||
|             "NAIVE parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed)) | ||||
|         self.logger.info("NAIVE parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed)) | ||||
| 
 | ||||
|         if self.seed is not None: | ||||
|             random.seed(self.seed) | ||||
| @@ -46,7 +45,7 @@ class NAIVE(Model): | ||||
|     def mse(self, preds, labels): | ||||
|         masks = ~np.isnan(labels) | ||||
|         masked_preds = preds[masks] | ||||
|         masked_labels= labels[masks] | ||||
|         masked_labels = labels[masks] | ||||
|         return np.square(masked_preds - masked_labels).mean() | ||||
| 
 | ||||
|     def model(self, x): | ||||
| @@ -54,17 +53,14 @@ class NAIVE(Model): | ||||
|         masks = ~np.isnan(x) | ||||
|         results = [] | ||||
|         for rowd, rowm in zip(x, masks): | ||||
|           temp = rowd[rowm] | ||||
|           if rowm.any(): | ||||
|             results.append(float(rowd[rowm][-1])) | ||||
|           else: | ||||
|             results.append(0) | ||||
|             temp = rowd[rowm] | ||||
|             if rowm.any(): | ||||
|                 results.append(float(rowd[rowm][-1])) | ||||
|             else: | ||||
|                 results.append(0) | ||||
|         return np.array(results, dtype=x.dtype) | ||||
| 
 | ||||
|     def fit( | ||||
|         self, | ||||
|         dataset: DatasetH | ||||
|     ): | ||||
|     def fit(self, dataset: DatasetH): | ||||
|         def _prepare_dataset(df_data): | ||||
|             features = df_data["feature"].values | ||||
|             features = self.process_data(features) | ||||
| @@ -83,8 +79,8 @@ class NAIVE(Model): | ||||
|         ) | ||||
|         # df_train['feature']['CLOSE1'].values | ||||
|         # train_dataset['features'][:, -1] | ||||
|         train_mse_loss = self.mse(self.model(train_dataset['features']), train_dataset['labels']) | ||||
|         valid_mse_loss = self.mse(self.model(valid_dataset['features']), valid_dataset['labels']) | ||||
|         train_mse_loss = self.mse(self.model(train_dataset["features"]), train_dataset["labels"]) | ||||
|         valid_mse_loss = self.mse(self.model(valid_dataset["features"]), valid_dataset["labels"]) | ||||
|         self.logger.info("Training MSE loss: {:}".format(train_mse_loss)) | ||||
|         self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss)) | ||||
|         self.fitted = True | ||||
| @@ -16,7 +16,7 @@ fi | ||||
| gpu=$1 | ||||
| market=$2 | ||||
|  | ||||
| algorithms="NAIVE MLP GRU LSTM ALSTM XGBoost LightGBM SFM TabNet DoubleE" | ||||
| algorithms="NAIVE-V1 NAIVE-V2 MLP GRU LSTM ALSTM XGBoost LightGBM SFM TabNet DoubleE" | ||||
|  | ||||
| for alg in ${algorithms} | ||||
| do | ||||
|   | ||||
		Reference in New Issue
	
	Block a user