Upgrade spaces and add more tests
This commit is contained in:
		| @@ -1,6 +1,8 @@ | ||||
| ################################################## | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021 # | ||||
| ################################################## | ||||
| # Use noise as prediction                        # | ||||
| ################################################## | ||||
| from __future__ import division | ||||
| from __future__ import print_function | ||||
|  | ||||
| @@ -27,7 +29,11 @@ class NAIVE_V1(Model): | ||||
|         self.d_feat = d_feat | ||||
|         self.seed = seed | ||||
|  | ||||
|         self.logger.info("NAIVE-V1 parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed)) | ||||
|         self.logger.info( | ||||
|             "NAIVE-V1 parameters setting: d_feat={:}, seed={:}".format( | ||||
|                 self.d_feat, self.seed | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         if self.seed is not None: | ||||
|             random.seed(self.seed) | ||||
| @@ -49,7 +55,9 @@ class NAIVE_V1(Model): | ||||
|  | ||||
|     def model(self, x): | ||||
|         num = len(x) | ||||
|         return np.random.normal(loc=self._mean, scale=self._std, size=num).astype(x.dtype) | ||||
|         return np.random.normal(loc=self._mean, scale=self._std, size=num).astype( | ||||
|             x.dtype | ||||
|         ) | ||||
|  | ||||
|     def fit(self, dataset: DatasetH): | ||||
|         def _prepare_dataset(df_data): | ||||
| @@ -71,9 +79,15 @@ class NAIVE_V1(Model): | ||||
|         # df_train['feature']['CLOSE1'].values | ||||
|         # train_dataset['features'][:, -1] | ||||
|         masks = ~np.isnan(train_dataset["labels"]) | ||||
|         self._mean, self._std = np.mean(train_dataset["labels"][masks]), np.std(train_dataset["labels"][masks]) | ||||
|         train_mse_loss = self.mse(self.model(train_dataset["features"]), train_dataset["labels"]) | ||||
|         valid_mse_loss = self.mse(self.model(valid_dataset["features"]), valid_dataset["labels"]) | ||||
|         self._mean, self._std = np.mean(train_dataset["labels"][masks]), np.std( | ||||
|             train_dataset["labels"][masks] | ||||
|         ) | ||||
|         train_mse_loss = self.mse( | ||||
|             self.model(train_dataset["features"]), train_dataset["labels"] | ||||
|         ) | ||||
|         valid_mse_loss = self.mse( | ||||
|             self.model(valid_dataset["features"]), valid_dataset["labels"] | ||||
|         ) | ||||
|         self.logger.info("Training MSE loss: {:}".format(train_mse_loss)) | ||||
|         self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss)) | ||||
|         self.fitted = True | ||||
|   | ||||
| @@ -29,7 +29,11 @@ class NAIVE_V2(Model): | ||||
|         self.d_feat = d_feat | ||||
|         self.seed = seed | ||||
|  | ||||
|         self.logger.info("NAIVE parameters setting: d_feat={:}, seed={:}".format(self.d_feat, self.seed)) | ||||
|         self.logger.info( | ||||
|             "NAIVE parameters setting: d_feat={:}, seed={:}".format( | ||||
|                 self.d_feat, self.seed | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         if self.seed is not None: | ||||
|             random.seed(self.seed) | ||||
| @@ -79,8 +83,12 @@ class NAIVE_V2(Model): | ||||
|         ) | ||||
|         # df_train['feature']['CLOSE1'].values | ||||
|         # train_dataset['features'][:, -1] | ||||
|         train_mse_loss = self.mse(self.model(train_dataset["features"]), train_dataset["labels"]) | ||||
|         valid_mse_loss = self.mse(self.model(valid_dataset["features"]), valid_dataset["labels"]) | ||||
|         train_mse_loss = self.mse( | ||||
|             self.model(train_dataset["features"]), train_dataset["labels"] | ||||
|         ) | ||||
|         valid_mse_loss = self.mse( | ||||
|             self.model(valid_dataset["features"]), valid_dataset["labels"] | ||||
|         ) | ||||
|         self.logger.info("Training MSE loss: {:}".format(train_mse_loss)) | ||||
|         self.logger.info("Validation MSE loss: {:}".format(valid_mse_loss)) | ||||
|         self.fitted = True | ||||
|   | ||||
| @@ -37,14 +37,22 @@ from qlib.data.dataset.handler import DataHandlerLP | ||||
|  | ||||
|  | ||||
| DEFAULT_OPT_CONFIG = dict( | ||||
|     epochs=200, lr=0.001, batch_size=2000, early_stop=20, loss="mse", optimizer="adam", num_workers=4 | ||||
|     epochs=200, | ||||
|     lr=0.001, | ||||
|     batch_size=2000, | ||||
|     early_stop=20, | ||||
|     loss="mse", | ||||
|     optimizer="adam", | ||||
|     num_workers=4, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class QuantTransformer(Model): | ||||
|     """Transformer-based Quant Model""" | ||||
|  | ||||
|     def __init__(self, net_config=None, opt_config=None, metric="", GPU=0, seed=None, **kwargs): | ||||
|     def __init__( | ||||
|         self, net_config=None, opt_config=None, metric="", GPU=0, seed=None, **kwargs | ||||
|     ): | ||||
|         # Set logger. | ||||
|         self.logger = get_module_logger("QuantTransformer") | ||||
|         self.logger.info("QuantTransformer PyTorch version...") | ||||
| @@ -53,7 +61,9 @@ class QuantTransformer(Model): | ||||
|         self.net_config = net_config or DEFAULT_NET_CONFIG | ||||
|         self.opt_config = opt_config or DEFAULT_OPT_CONFIG | ||||
|         self.metric = metric | ||||
|         self.device = torch.device("cuda:{:}".format(GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") | ||||
|         self.device = torch.device( | ||||
|             "cuda:{:}".format(GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu" | ||||
|         ) | ||||
|         self.seed = seed | ||||
|  | ||||
|         self.logger.info( | ||||
| @@ -84,11 +94,17 @@ class QuantTransformer(Model): | ||||
|         self.logger.info("model size: {:.3f} MB".format(count_parameters(self.model))) | ||||
|  | ||||
|         if self.opt_config["optimizer"] == "adam": | ||||
|             self.train_optimizer = optim.Adam(self.model.parameters(), lr=self.opt_config["lr"]) | ||||
|             self.train_optimizer = optim.Adam( | ||||
|                 self.model.parameters(), lr=self.opt_config["lr"] | ||||
|             ) | ||||
|         elif self.opt_config["optimizer"] == "adam": | ||||
|             self.train_optimizer = optim.SGD(self.model.parameters(), lr=self.opt_config["lr"]) | ||||
|             self.train_optimizer = optim.SGD( | ||||
|                 self.model.parameters(), lr=self.opt_config["lr"] | ||||
|             ) | ||||
|         else: | ||||
|             raise NotImplementedError("optimizer {:} is not supported!".format(optimizer)) | ||||
|             raise NotImplementedError( | ||||
|                 "optimizer {:} is not supported!".format(optimizer) | ||||
|             ) | ||||
|  | ||||
|         self.fitted = False | ||||
|         self.model.to(self.device) | ||||
| @@ -111,7 +127,9 @@ class QuantTransformer(Model): | ||||
|         else: | ||||
|             raise ValueError("unknown metric `{:}`".format(self.metric)) | ||||
|  | ||||
|     def train_or_test_epoch(self, xloader, model, loss_fn, metric_fn, is_train, optimizer=None): | ||||
|     def train_or_test_epoch( | ||||
|         self, xloader, model, loss_fn, metric_fn, is_train, optimizer=None | ||||
|     ): | ||||
|         if is_train: | ||||
|             model.train() | ||||
|         else: | ||||
| @@ -173,7 +191,11 @@ class QuantTransformer(Model): | ||||
|         ) | ||||
|  | ||||
|         save_dir = get_or_create_path(save_dir, return_dir=True) | ||||
|         self.logger.info("Fit procedure for [{:}] with save path={:}".format(self.__class__.__name__, save_dir)) | ||||
|         self.logger.info( | ||||
|             "Fit procedure for [{:}] with save path={:}".format( | ||||
|                 self.__class__.__name__, save_dir | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         def _internal_test(ckp_epoch=None, results_dict=None): | ||||
|             with torch.no_grad(): | ||||
| @@ -186,8 +208,10 @@ class QuantTransformer(Model): | ||||
|                 test_loss, test_score = self.train_or_test_epoch( | ||||
|                     test_loader, self.model, self.loss_fn, self.metric_fn, False, None | ||||
|                 ) | ||||
|                 xstr = "train-score={:.6f}, valid-score={:.6f}, test-score={:.6f}".format( | ||||
|                     train_score, valid_score, test_score | ||||
|                 xstr = ( | ||||
|                     "train-score={:.6f}, valid-score={:.6f}, test-score={:.6f}".format( | ||||
|                         train_score, valid_score, test_score | ||||
|                     ) | ||||
|                 ) | ||||
|                 if ckp_epoch is not None and isinstance(results_dict, dict): | ||||
|                     results_dict["train"][ckp_epoch] = train_score | ||||
| @@ -199,18 +223,26 @@ class QuantTransformer(Model): | ||||
|         ckp_path = os.path.join(save_dir, "{:}.pth".format(self.__class__.__name__)) | ||||
|         if os.path.exists(ckp_path): | ||||
|             ckp_data = torch.load(ckp_path) | ||||
|             stop_steps, best_score, best_epoch = ckp_data['stop_steps'], ckp_data['best_score'], ckp_data['best_epoch'] | ||||
|             start_epoch, best_param = ckp_data['start_epoch'], ckp_data['best_param'] | ||||
|             results_dict = ckp_data['results_dict'] | ||||
|             self.model.load_state_dict(ckp_data['net_state_dict']) | ||||
|             self.train_optimizer.load_state_dict(ckp_data['opt_state_dict']) | ||||
|             stop_steps, best_score, best_epoch = ( | ||||
|                 ckp_data["stop_steps"], | ||||
|                 ckp_data["best_score"], | ||||
|                 ckp_data["best_epoch"], | ||||
|             ) | ||||
|             start_epoch, best_param = ckp_data["start_epoch"], ckp_data["best_param"] | ||||
|             results_dict = ckp_data["results_dict"] | ||||
|             self.model.load_state_dict(ckp_data["net_state_dict"]) | ||||
|             self.train_optimizer.load_state_dict(ckp_data["opt_state_dict"]) | ||||
|             self.logger.info("Resume from existing checkpoint: {:}".format(ckp_path)) | ||||
|         else: | ||||
|             stop_steps, best_score, best_epoch = 0, -np.inf, -1 | ||||
|             start_epoch, best_param = 0, None | ||||
|             results_dict = dict(train=OrderedDict(), valid=OrderedDict(), test=OrderedDict()) | ||||
|             results_dict = dict( | ||||
|                 train=OrderedDict(), valid=OrderedDict(), test=OrderedDict() | ||||
|             ) | ||||
|             _, eval_str = _internal_test(-1, results_dict) | ||||
|             self.logger.info("Training from scratch, metrics@start: {:}".format(eval_str)) | ||||
|             self.logger.info( | ||||
|                 "Training from scratch, metrics@start: {:}".format(eval_str) | ||||
|             ) | ||||
|  | ||||
|         for iepoch in range(start_epoch, self.opt_config["epochs"]): | ||||
|             self.logger.info( | ||||
| @@ -219,20 +251,35 @@ class QuantTransformer(Model): | ||||
|                 ) | ||||
|             ) | ||||
|             train_loss, train_score = self.train_or_test_epoch( | ||||
|                 train_loader, self.model, self.loss_fn, self.metric_fn, True, self.train_optimizer | ||||
|                 train_loader, | ||||
|                 self.model, | ||||
|                 self.loss_fn, | ||||
|                 self.metric_fn, | ||||
|                 True, | ||||
|                 self.train_optimizer, | ||||
|             ) | ||||
|             self.logger.info( | ||||
|                 "Training :: loss={:.6f}, score={:.6f}".format(train_loss, train_score) | ||||
|             ) | ||||
|             self.logger.info("Training :: loss={:.6f}, score={:.6f}".format(train_loss, train_score)) | ||||
|  | ||||
|             current_eval_scores, eval_str = _internal_test(iepoch, results_dict) | ||||
|             self.logger.info("Evaluating :: {:}".format(eval_str)) | ||||
|  | ||||
|             if current_eval_scores["valid"] > best_score: | ||||
|                 stop_steps, best_epoch, best_score = 0, iepoch, current_eval_scores["valid"] | ||||
|                 stop_steps, best_epoch, best_score = ( | ||||
|                     0, | ||||
|                     iepoch, | ||||
|                     current_eval_scores["valid"], | ||||
|                 ) | ||||
|                 best_param = copy.deepcopy(self.model.state_dict()) | ||||
|             else: | ||||
|                 stop_steps += 1 | ||||
|                 if stop_steps >= self.opt_config["early_stop"]: | ||||
|                     self.logger.info("early stop at {:}-th epoch, where the best is @{:}".format(iepoch, best_epoch)) | ||||
|                     self.logger.info( | ||||
|                         "early stop at {:}-th epoch, where the best is @{:}".format( | ||||
|                             iepoch, best_epoch | ||||
|                         ) | ||||
|                     ) | ||||
|                     break | ||||
|             save_info = dict( | ||||
|                 net_config=self.net_config, | ||||
| @@ -247,9 +294,11 @@ class QuantTransformer(Model): | ||||
|                 start_epoch=iepoch + 1, | ||||
|             ) | ||||
|             torch.save(save_info, ckp_path) | ||||
|         self.logger.info("The best score: {:.6f} @ {:02d}-th epoch".format(best_score, best_epoch)) | ||||
|         self.logger.info( | ||||
|             "The best score: {:.6f} @ {:02d}-th epoch".format(best_score, best_epoch) | ||||
|         ) | ||||
|         self.model.load_state_dict(best_param) | ||||
|         _, eval_str = _internal_test('final', results_dict) | ||||
|         _, eval_str = _internal_test("final", results_dict) | ||||
|         self.logger.info("Reload the best parameter :: {:}".format(eval_str)) | ||||
|  | ||||
|         if self.use_gpu: | ||||
|   | ||||
| @@ -33,7 +33,15 @@ DEFAULT_NET_CONFIG = dict( | ||||
|  | ||||
|  | ||||
| class Attention(nn.Module): | ||||
|     def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.0, proj_drop=0.0): | ||||
|     def __init__( | ||||
|         self, | ||||
|         dim, | ||||
|         num_heads=8, | ||||
|         qkv_bias=False, | ||||
|         qk_scale=None, | ||||
|         attn_drop=0.0, | ||||
|         proj_drop=0.0, | ||||
|     ): | ||||
|         super(Attention, self).__init__() | ||||
|         self.num_heads = num_heads | ||||
|         head_dim = dim // num_heads | ||||
| @@ -46,8 +54,16 @@ class Attention(nn.Module): | ||||
|  | ||||
|     def forward(self, x): | ||||
|         B, N, C = x.shape | ||||
|         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) | ||||
|         q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple) | ||||
|         qkv = ( | ||||
|             self.qkv(x) | ||||
|             .reshape(B, N, 3, self.num_heads, C // self.num_heads) | ||||
|             .permute(2, 0, 3, 1, 4) | ||||
|         ) | ||||
|         q, k, v = ( | ||||
|             qkv[0], | ||||
|             qkv[1], | ||||
|             qkv[2], | ||||
|         )  # make torchscript happy (cannot use tensor as tuple) | ||||
|  | ||||
|         attn = (q @ k.transpose(-2, -1)) * self.scale | ||||
|         attn = attn.softmax(dim=-1) | ||||
| @@ -76,13 +92,25 @@ class Block(nn.Module): | ||||
|         super(Block, self).__init__() | ||||
|         self.norm1 = norm_layer(dim) | ||||
|         self.attn = Attention( | ||||
|             dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=mlp_drop | ||||
|             dim, | ||||
|             num_heads=num_heads, | ||||
|             qkv_bias=qkv_bias, | ||||
|             qk_scale=qk_scale, | ||||
|             attn_drop=attn_drop, | ||||
|             proj_drop=mlp_drop, | ||||
|         ) | ||||
|         # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here | ||||
|         self.drop_path = xlayers.DropPath(drop_path) if drop_path > 0.0 else nn.Identity() | ||||
|         self.drop_path = ( | ||||
|             xlayers.DropPath(drop_path) if drop_path > 0.0 else nn.Identity() | ||||
|         ) | ||||
|         self.norm2 = norm_layer(dim) | ||||
|         mlp_hidden_dim = int(dim * mlp_ratio) | ||||
|         self.mlp = xlayers.MLP(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=mlp_drop) | ||||
|         self.mlp = xlayers.MLP( | ||||
|             in_features=dim, | ||||
|             hidden_features=mlp_hidden_dim, | ||||
|             act_layer=act_layer, | ||||
|             drop=mlp_drop, | ||||
|         ) | ||||
|  | ||||
|     def forward(self, x): | ||||
|         x = x + self.drop_path(self.attn(self.norm1(x))) | ||||
| @@ -144,9 +172,13 @@ class TransformerModel(nn.Module): | ||||
|         self.input_embed = SimpleEmbed(d_feat, embed_dim=embed_dim) | ||||
|  | ||||
|         self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) | ||||
|         self.pos_embed = xlayers.PositionalEncoder(d_model=embed_dim, max_seq_len=max_seq_len, dropout=pos_drop) | ||||
|         self.pos_embed = xlayers.PositionalEncoder( | ||||
|             d_model=embed_dim, max_seq_len=max_seq_len, dropout=pos_drop | ||||
|         ) | ||||
|  | ||||
|         dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule | ||||
|         dpr = [ | ||||
|             x.item() for x in torch.linspace(0, drop_path_rate, depth) | ||||
|         ]  # stochastic depth decay rule | ||||
|         self.blocks = nn.ModuleList( | ||||
|             [ | ||||
|                 Block( | ||||
| @@ -184,7 +216,9 @@ class TransformerModel(nn.Module): | ||||
|         batch, flatten_size = x.shape | ||||
|         feats = self.input_embed(x)  # batch * 60 * 64 | ||||
|  | ||||
|         cls_tokens = self.cls_token.expand(batch, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks | ||||
|         cls_tokens = self.cls_token.expand( | ||||
|             batch, -1, -1 | ||||
|         )  # stole cls_tokens impl from Phil Wang, thanks | ||||
|         feats_w_ct = torch.cat((cls_tokens, feats), dim=1) | ||||
|         feats_w_tp = self.pos_embed(feats_w_ct) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user