diff --git a/exps/trading/baselines.py b/exps/trading/baselines.py index 27b6e24..e81ce01 100644 --- a/exps/trading/baselines.py +++ b/exps/trading/baselines.py @@ -15,8 +15,8 @@ # python exps/trading/baselines.py --alg TabNet # # # # python exps/trading/baselines.py --alg Transformer# -# python exps/trading/baselines.py --alg TSF # -# python exps/trading/baselines.py --alg TSF-4x64-d0 +# python exps/trading/baselines.py --alg TSF +# python exps/trading/baselines.py --alg TSF-4x64-drop0_0 ##################################################### import sys import copy @@ -40,10 +40,11 @@ from qlib.workflow import R from qlib.utils import flatten_dict -def to_pos_drop(config, value): +def to_drop(config, pos_drop, other_drop): config = copy.deepcopy(config) net = config["task"]["model"]["kwargs"]["net_config"] - net["pos_drop"] = value + net["pos_drop"] = pos_drop + net["other_drop"] = other_drop return config @@ -59,11 +60,12 @@ def to_layer(config, embed_dim, depth): def extend_transformer_settings(alg2configs, name): config = copy.deepcopy(alg2configs[name]) for i in range(1, 7): - for j in [6, 12, 24, 32, 48, 64]: - for k in [0, 0.1]: - alg2configs[name + "-{:}x{:}-d{:}".format(i, j, k)] = to_layer( - to_pos_drop(config, k), j, i - ) + for j in (6, 12, 24, 32, 48, 64): + for k1 in (0, 0.1, 0.2): + for k2 in (0, 0.1): + alg2configs[ + name + "-{:}x{:}-drop{:}_{:}".format(i, j, k1, k2) + ] = to_layer(to_drop(config, k1, k2), j, i) return alg2configs diff --git a/lib/procedures/q_exps.py b/lib/procedures/q_exps.py index 4eb8c31..47c5a73 100644 --- a/lib/procedures/q_exps.py +++ b/lib/procedures/q_exps.py @@ -107,7 +107,7 @@ def run_exp( model = R.load_object(model_obj_name) logger.info("[Find existing object from {:}]".format(model_obj_name)) except OSError: - # R.log_params(**flatten_dict(task_config)) + R.log_params(**flatten_dict(update_gpu(task_config, None))) if "save_path" in inspect.getfullargspec(model.fit).args: model_fit_kwargs["save_path"] = os.path.join( recorder_root_dir, "model.ckp" @@ -126,9 +126,6 @@ def run_exp( else: R.save_objects(**{model_obj_name: model}) except Exception as e: - import pdb - - pdb.set_trace() raise ValueError("Something wrong: {:}".format(e)) # Get the recorder recorder = R.get_recorder() diff --git a/lib/trade_models/quant_transformer.py b/lib/trade_models/quant_transformer.py index fec729e..c29e62b 100644 --- a/lib/trade_models/quant_transformer.py +++ b/lib/trade_models/quant_transformer.py @@ -45,6 +45,32 @@ DEFAULT_OPT_CONFIG = dict( ) +def train_or_test_epoch( + xloader, model, loss_fn, metric_fn, is_train, optimizer, device +): + if is_train: + model.train() + else: + model.eval() + score_meter, loss_meter = AverageMeter(), AverageMeter() + for ibatch, (feats, labels) in enumerate(xloader): + feats, labels = feats.to(device), labels.to(device) + # forward the network + preds = model(feats) + loss = loss_fn(preds, labels) + with torch.no_grad(): + score = metric_fn(preds, labels) + loss_meter.update(loss.item(), feats.size(0)) + score_meter.update(score.item(), feats.size(0)) + # optimize the network + if is_train and optimizer is not None: + optimizer.zero_grad() + loss.backward() + torch.nn.utils.clip_grad_value_(model.parameters(), 3.0) + optimizer.step() + return loss_meter.avg, score_meter.avg + + class QuantTransformer(Model): """Transformer-based Quant Model""" @@ -132,32 +158,6 @@ class QuantTransformer(Model): else: raise ValueError("unknown metric `{:}`".format(self.metric)) - def train_or_test_epoch( - self, xloader, model, loss_fn, metric_fn, is_train, optimizer=None - ): - if is_train: - model.train() - else: - model.eval() - score_meter, loss_meter = AverageMeter(), AverageMeter() - for ibatch, (feats, labels) in enumerate(xloader): - feats = feats.to(self.device, non_blocking=True) - labels = labels.to(self.device, non_blocking=True) - # forward the network - preds = model(feats) - loss = loss_fn(preds, labels) - with torch.no_grad(): - score = self.metric_fn(preds, labels) - loss_meter.update(loss.item(), feats.size(0)) - score_meter.update(score.item(), feats.size(0)) - # optimize the network - if is_train and optimizer is not None: - optimizer.zero_grad() - loss.backward() - torch.nn.utils.clip_grad_value_(model.parameters(), 3.0) - optimizer.step() - return loss_meter.avg, score_meter.avg - def fit( self, dataset: DatasetH, @@ -204,14 +204,22 @@ class QuantTransformer(Model): def _internal_test(ckp_epoch=None, results_dict=None): with torch.no_grad(): - train_loss, train_score = self.train_or_test_epoch( - train_loader, self.model, self.loss_fn, self.metric_fn, False, None + shared_kwards = { + "model": self.model, + "loss_fn": self.loss_fn, + "metric_fn": self.metric_fn, + "is_train": False, + "optimizer": None, + "device": self.device, + } + train_loss, train_score = train_or_test_epoch( + train_loader, **shared_kwards ) - valid_loss, valid_score = self.train_or_test_epoch( - valid_loader, self.model, self.loss_fn, self.metric_fn, False, None + valid_loss, valid_score = train_or_test_epoch( + valid_loader, **shared_kwards ) - test_loss, test_score = self.train_or_test_epoch( - test_loader, self.model, self.loss_fn, self.metric_fn, False, None + test_loss, test_score = train_or_test_epoch( + test_loader, **shared_kwards ) xstr = ( "train-score={:.6f}, valid-score={:.6f}, test-score={:.6f}".format( @@ -255,13 +263,14 @@ class QuantTransformer(Model): iepoch, self.opt_config["epochs"], best_epoch, best_score ) ) - train_loss, train_score = self.train_or_test_epoch( + train_loss, train_score = train_or_test_epoch( train_loader, self.model, self.loss_fn, self.metric_fn, True, self.train_optimizer, + self.device, ) self.logger.info( "Training :: loss={:.6f}, score={:.6f}".format(train_loss, train_score) @@ -307,7 +316,8 @@ class QuantTransformer(Model): self.logger.info("Reload the best parameter :: {:}".format(eval_str)) if self.use_gpu: - torch.cuda.empty_cache() + with torch.cuda.device(self.device): + torch.cuda.empty_cache() self.fitted = True def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"): diff --git a/lib/xlayers/super_linear.py b/lib/xlayers/super_linear.py index 4155bf6..5d2f005 100644 --- a/lib/xlayers/super_linear.py +++ b/lib/xlayers/super_linear.py @@ -30,11 +30,14 @@ class SuperLinear(SuperModule): self._out_features = out_features self._bias = bias # weights to be optimized - self._super_weight = torch.nn.Parameter( - torch.Tensor(self.out_features, self.in_features) + self.register_parameter( + "_super_weight", + torch.nn.Parameter(torch.Tensor(self.out_features, self.in_features)), ) if self.bias: - self._super_bias = torch.nn.Parameter(torch.Tensor(self.out_features)) + self.register_parameter( + "_super_bias", torch.nn.Parameter(torch.Tensor(self.out_features)) + ) else: self.register_parameter("_super_bias", None) self.reset_parameters() diff --git a/lib/xlayers/super_norm.py b/lib/xlayers/super_norm.py index 0b1d38f..4e3b0d8 100644 --- a/lib/xlayers/super_norm.py +++ b/lib/xlayers/super_norm.py @@ -25,8 +25,8 @@ class SuperLayerNorm1D(SuperModule): self._eps = eps self._elementwise_affine = elementwise_affine if self._elementwise_affine: - self.weight = nn.Parameter(torch.Tensor(self.in_dim)) - self.bias = nn.Parameter(torch.Tensor(self.in_dim)) + self.register_parameter("weight", nn.Parameter(torch.Tensor(self.in_dim))) + self.register_parameter("bias", nn.Parameter(torch.Tensor(self.in_dim))) else: self.register_parameter("weight", None) self.register_parameter("bias", None) diff --git a/scripts/trade/tsf-all.sh b/scripts/trade/tsf-all.sh index 323dda5..821966a 100644 --- a/scripts/trade/tsf-all.sh +++ b/scripts/trade/tsf-all.sh @@ -1,7 +1,7 @@ #!/bin/bash # -# bash scripts/trade/tsf-all.sh 0 csi300 0 -# bash scripts/trade/tsf-all.sh 0 csi300 0.1 +# bash scripts/trade/tsf-all.sh 0 csi300 0_0 +# bash scripts/trade/tsf-all.sh 0 csi300 0.1_0 # bash scripts/trade/tsf-all.sh 1 all # set -e @@ -24,6 +24,6 @@ for channel in ${channels} do for depth in ${depths} do - python exps/trading/baselines.py --alg TSF-${depth}x${channel}-d${drop} --gpu ${gpu} --market ${market} + python exps/trading/baselines.py --alg TSF-${depth}x${channel}-drop${drop} --gpu ${gpu} --market ${market} done done diff --git a/scripts/trade/tsf.sh b/scripts/trade/tsf.sh index 3f6f3fb..2757c50 100644 --- a/scripts/trade/tsf.sh +++ b/scripts/trade/tsf.sh @@ -1,9 +1,9 @@ #!/bin/bash # -# bash scripts/trade/tsf.sh 0 csi300 3 0 -# bash scripts/trade/tsf.sh 0 csi300 3 0.1 -# bash scripts/trade/tsf.sh 1 csi100 3 -# bash scripts/trade/tsf.sh 1 all 3 +# bash scripts/trade/tsf.sh 0 csi300 3 0_0 +# bash scripts/trade/tsf.sh 0 csi300 3 0.1_0 +# bash scripts/trade/tsf.sh 1 csi100 3 0.2_0 +# bash scripts/trade/tsf.sh 1 all 3 0.1_0 # set -e echo script name: $0 @@ -24,6 +24,6 @@ channels="6 12 24 32 48 64" for channel in ${channels} do - python exps/trading/baselines.py --alg TSF-${depth}x${channel}-d${drop} --gpu ${gpu} --market ${market} + python exps/trading/baselines.py --alg TSF-${depth}x${channel}-drop${drop} --gpu ${gpu} --market ${market} done