103 lines
3.4 KiB
Python
103 lines
3.4 KiB
Python
##################################################
|
|
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021 #
|
|
##################################################
|
|
# Use noise as prediction #
|
|
##################################################
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import random
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from qlib.log import get_module_logger
|
|
|
|
from qlib.model.base import Model
|
|
from qlib.data.dataset import DatasetH
|
|
from qlib.data.dataset.handler import DataHandlerLP
|
|
|
|
|
|
class NAIVE_V1(Model):
|
|
"""NAIVE Version 1 Quant Model"""
|
|
|
|
def __init__(self, d_feat=6, seed=None, **kwargs):
|
|
# Set logger.
|
|
self.logger = get_module_logger("NAIVE")
|
|
self.logger.info("NAIVE 1st version: random noise ...")
|
|
|
|
# set hyper-parameters.
|
|
self.d_feat = d_feat
|
|
self.seed = seed
|
|
|
|
self.logger.info(
|
|
"NAIVE-V1 parameters setting: d_feat={:}, seed={:}".format(
|
|
self.d_feat, self.seed
|
|
)
|
|
)
|
|
|
|
if self.seed is not None:
|
|
random.seed(self.seed)
|
|
np.random.seed(self.seed)
|
|
self._mean = None
|
|
self._std = None
|
|
self.fitted = False
|
|
|
|
def process_data(self, features):
|
|
features = features.reshape(len(features), self.d_feat, -1)
|
|
features = features.transpose((0, 2, 1))
|
|
return features[:, :59, 0]
|
|
|
|
def mse(self, preds, labels):
|
|
masks = ~np.isnan(labels)
|
|
masked_preds = preds[masks]
|
|
masked_labels = labels[masks]
|
|
return np.square(masked_preds - masked_labels).mean()
|
|
|
|
def model(self, x):
|
|
num = len(x)
|
|
return np.random.normal(loc=self._mean, scale=self._std, size=num).astype(
|
|
x.dtype
|
|
)
|
|
|
|
def fit(self, dataset: DatasetH):
|
|
def _prepare_dataset(df_data):
|
|
features = df_data["feature"].values
|
|
features = self.process_data(features)
|
|
labels = df_data["label"].values.squeeze()
|
|
return dict(features=features, labels=labels)
|
|
|
|
df_train, df_valid, df_test = dataset.prepare(
|
|
["train", "valid", "test"],
|
|
col_set=["feature", "label"],
|
|
data_key=DataHandlerLP.DK_L,
|
|
)
|
|
train_dataset, valid_dataset, test_dataset = (
|
|
_prepare_dataset(df_train),
|
|
_prepare_dataset(df_valid),
|
|
_prepare_dataset(df_test),
|
|
)
|
|
# df_train['feature']['CLOSE1'].values
|
|
# train_dataset['features'][:, -1]
|
|
masks = ~np.isnan(train_dataset["labels"])
|
|
self._mean, self._std = np.mean(train_dataset["labels"][masks]), np.std(
|
|
train_dataset["labels"][masks]
|
|
)
|
|
train_mse_loss = self.mse(
|
|
self.model(train_dataset["features"]), train_dataset["labels"]
|
|
)
|
|
valid_mse_loss = self.mse(
|
|
self.model(valid_dataset["features"]), valid_dataset["labels"]
|
|
)
|
|
self.logger.info("Training MSE loss: {:}".format(train_mse_loss))
|
|
self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss))
|
|
self.fitted = True
|
|
|
|
def predict(self, dataset):
|
|
if not self.fitted:
|
|
raise ValueError("The model is not fitted yet!")
|
|
x_test = dataset.prepare("test", col_set="feature")
|
|
index = x_test.index
|
|
|
|
preds = self.model(self.process_data(x_test.values))
|
|
return pd.Series(preds, index=index)
|