Add baselines

This commit is contained in:
D-X-Y 2021-03-17 03:32:47 +00:00
parent f6cbac706f
commit e04f17116d
7 changed files with 172 additions and 22 deletions

@ -1 +1 @@
Subproject commit 88b0871c12d0b139da489c53e02444606f6ca634 Subproject commit aa552fdb2089cf5b4396a6b75191d2c13211b42d

View File

@ -30,8 +30,8 @@ port_analysis_config: &port_analysis_config
min_cost: 5 min_cost: 5
task: task:
model: model:
class: NAIVE class: NAIVE_V1
module_path: trade_models.naive_model module_path: trade_models.naive_v1_model
kwargs: kwargs:
d_feat: 6 d_feat: 6
dataset: dataset:

View File

@ -0,0 +1,64 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market all
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors: []
learn_processors: []
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: NAIVE_V2
module_path: trade_models.naive_v2_model
kwargs:
d_feat: 6
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: SignalMseRecord
module_path: qlib.contrib.workflow.record_temp
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@ -5,7 +5,8 @@
# python exps/trading/baselines.py --alg GRU # # python exps/trading/baselines.py --alg GRU #
# python exps/trading/baselines.py --alg LSTM # # python exps/trading/baselines.py --alg LSTM #
# python exps/trading/baselines.py --alg ALSTM # # python exps/trading/baselines.py --alg ALSTM #
# python exps/trading/baselines.py --alg NAIVE # # python exps/trading/baselines.py --alg NAIVE-V1 #
# python exps/trading/baselines.py --alg NAIVE-V2 #
# # # #
# python exps/trading/baselines.py --alg SFM # # python exps/trading/baselines.py --alg SFM #
# python exps/trading/baselines.py --alg XGBoost # # python exps/trading/baselines.py --alg XGBoost #
@ -53,7 +54,8 @@ def retrieve_configs():
# DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis, https://arxiv.org/pdf/2010.01265.pdf # DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis, https://arxiv.org/pdf/2010.01265.pdf
alg2names["DoubleE"] = "workflow_config_doubleensemble_Alpha360.yaml" alg2names["DoubleE"] = "workflow_config_doubleensemble_Alpha360.yaml"
alg2names["TabNet"] = "workflow_config_TabNet_Alpha360.yaml" alg2names["TabNet"] = "workflow_config_TabNet_Alpha360.yaml"
alg2names["NAIVE"] = "workflow_config_naive_Alpha360.yaml" alg2names["NAIVE-V1"] = "workflow_config_naive_v1_Alpha360.yaml"
alg2names["NAIVE-V2"] = "workflow_config_naive_v2_Alpha360.yaml"
# find the yaml paths # find the yaml paths
alg2paths = OrderedDict() alg2paths = OrderedDict()

View File

@ -0,0 +1,88 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021 #
##################################################
from __future__ import division
from __future__ import print_function
import random
import numpy as np
import pandas as pd
from qlib.log import get_module_logger
from qlib.model.base import Model
from qlib.data.dataset import DatasetH
from qlib.data.dataset.handler import DataHandlerLP
class NAIVE_V1(Model):
"""NAIVE Version 1 Quant Model"""
def __init__(self, d_feat=6, seed=None, **kwargs):
# Set logger.
self.logger = get_module_logger("NAIVE")
self.logger.info("NAIVE 1st version: random noise ...")
# set hyper-parameters.
self.d_feat = d_feat
self.seed = seed
self.logger.info("NAIVE-V1 parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed))
if self.seed is not None:
random.seed(self.seed)
np.random.seed(self.seed)
self._mean = None
self._std = None
self.fitted = False
def process_data(self, features):
features = features.reshape(len(features), self.d_feat, -1)
features = features.transpose((0, 2, 1))
return features[:, :59, 0]
def mse(self, preds, labels):
masks = ~np.isnan(labels)
masked_preds = preds[masks]
masked_labels = labels[masks]
return np.square(masked_preds - masked_labels).mean()
def model(self, x):
num = len(x)
return np.random.normal(loc=self._mean, scale=self._std, size=num).astype(x.dtype)
def fit(self, dataset: DatasetH):
def _prepare_dataset(df_data):
features = df_data["feature"].values
features = self.process_data(features)
labels = df_data["label"].values.squeeze()
return dict(features=features, labels=labels)
df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
train_dataset, valid_dataset, test_dataset = (
_prepare_dataset(df_train),
_prepare_dataset(df_valid),
_prepare_dataset(df_test),
)
# df_train['feature']['CLOSE1'].values
# train_dataset['features'][:, -1]
masks = ~np.isnan(train_dataset["labels"])
self._mean, self._std = np.mean(train_dataset["labels"][masks]), np.std(train_dataset["labels"][masks])
train_mse_loss = self.mse(self.model(train_dataset["features"]), train_dataset["labels"])
valid_mse_loss = self.mse(self.model(valid_dataset["features"]), valid_dataset["labels"])
self.logger.info("Training MSE loss: {:}".format(train_mse_loss))
self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss))
self.fitted = True
def predict(self, dataset):
if not self.fitted:
raise ValueError("The model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
index = x_test.index
preds = self.model(self.process_data(x_test.values))
return pd.Series(preds, index=index)

View File

@ -17,8 +17,8 @@ from qlib.data.dataset import DatasetH
from qlib.data.dataset.handler import DataHandlerLP from qlib.data.dataset.handler import DataHandlerLP
class NAIVE(Model): class NAIVE_V2(Model):
"""NAIVE Quant Model""" """NAIVE Version 2 Quant Model"""
def __init__(self, d_feat=6, seed=None, **kwargs): def __init__(self, d_feat=6, seed=None, **kwargs):
# Set logger. # Set logger.
@ -29,8 +29,7 @@ class NAIVE(Model):
self.d_feat = d_feat self.d_feat = d_feat
self.seed = seed self.seed = seed
self.logger.info( self.logger.info("NAIVE parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed))
"NAIVE parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed))
if self.seed is not None: if self.seed is not None:
random.seed(self.seed) random.seed(self.seed)
@ -61,10 +60,7 @@ class NAIVE(Model):
results.append(0) results.append(0)
return np.array(results, dtype=x.dtype) return np.array(results, dtype=x.dtype)
def fit( def fit(self, dataset: DatasetH):
self,
dataset: DatasetH
):
def _prepare_dataset(df_data): def _prepare_dataset(df_data):
features = df_data["feature"].values features = df_data["feature"].values
features = self.process_data(features) features = self.process_data(features)
@ -83,8 +79,8 @@ class NAIVE(Model):
) )
# df_train['feature']['CLOSE1'].values # df_train['feature']['CLOSE1'].values
# train_dataset['features'][:, -1] # train_dataset['features'][:, -1]
train_mse_loss = self.mse(self.model(train_dataset['features']), train_dataset['labels']) train_mse_loss = self.mse(self.model(train_dataset["features"]), train_dataset["labels"])
valid_mse_loss = self.mse(self.model(valid_dataset['features']), valid_dataset['labels']) valid_mse_loss = self.mse(self.model(valid_dataset["features"]), valid_dataset["labels"])
self.logger.info("Training MSE loss: {:}".format(train_mse_loss)) self.logger.info("Training MSE loss: {:}".format(train_mse_loss))
self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss)) self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss))
self.fitted = True self.fitted = True

View File

@ -16,7 +16,7 @@ fi
gpu=$1 gpu=$1
market=$2 market=$2
algorithms="NAIVE MLP GRU LSTM ALSTM XGBoost LightGBM SFM TabNet DoubleE" algorithms="NAIVE-V1 NAIVE-V2 MLP GRU LSTM ALSTM XGBoost LightGBM SFM TabNet DoubleE"
for alg in ${algorithms} for alg in ${algorithms}
do do