Fix CUDA memory issues
This commit is contained in:
parent
5fb900bcb1
commit
9fc2c991f5
@ -15,8 +15,8 @@
|
||||
# python exps/trading/baselines.py --alg TabNet #
|
||||
# #
|
||||
# python exps/trading/baselines.py --alg Transformer#
|
||||
# python exps/trading/baselines.py --alg TSF #
|
||||
# python exps/trading/baselines.py --alg TSF-4x64-d0
|
||||
# python exps/trading/baselines.py --alg TSF
|
||||
# python exps/trading/baselines.py --alg TSF-4x64-drop0_0
|
||||
#####################################################
|
||||
import sys
|
||||
import copy
|
||||
@ -40,10 +40,11 @@ from qlib.workflow import R
|
||||
from qlib.utils import flatten_dict
|
||||
|
||||
|
||||
def to_pos_drop(config, value):
|
||||
def to_drop(config, pos_drop, other_drop):
|
||||
config = copy.deepcopy(config)
|
||||
net = config["task"]["model"]["kwargs"]["net_config"]
|
||||
net["pos_drop"] = value
|
||||
net["pos_drop"] = pos_drop
|
||||
net["other_drop"] = other_drop
|
||||
return config
|
||||
|
||||
|
||||
@ -59,11 +60,12 @@ def to_layer(config, embed_dim, depth):
|
||||
def extend_transformer_settings(alg2configs, name):
|
||||
config = copy.deepcopy(alg2configs[name])
|
||||
for i in range(1, 7):
|
||||
for j in [6, 12, 24, 32, 48, 64]:
|
||||
for k in [0, 0.1]:
|
||||
alg2configs[name + "-{:}x{:}-d{:}".format(i, j, k)] = to_layer(
|
||||
to_pos_drop(config, k), j, i
|
||||
)
|
||||
for j in (6, 12, 24, 32, 48, 64):
|
||||
for k1 in (0, 0.1, 0.2):
|
||||
for k2 in (0, 0.1):
|
||||
alg2configs[
|
||||
name + "-{:}x{:}-drop{:}_{:}".format(i, j, k1, k2)
|
||||
] = to_layer(to_drop(config, k1, k2), j, i)
|
||||
return alg2configs
|
||||
|
||||
|
||||
|
@ -107,7 +107,7 @@ def run_exp(
|
||||
model = R.load_object(model_obj_name)
|
||||
logger.info("[Find existing object from {:}]".format(model_obj_name))
|
||||
except OSError:
|
||||
# R.log_params(**flatten_dict(task_config))
|
||||
R.log_params(**flatten_dict(update_gpu(task_config, None)))
|
||||
if "save_path" in inspect.getfullargspec(model.fit).args:
|
||||
model_fit_kwargs["save_path"] = os.path.join(
|
||||
recorder_root_dir, "model.ckp"
|
||||
@ -126,9 +126,6 @@ def run_exp(
|
||||
else:
|
||||
R.save_objects(**{model_obj_name: model})
|
||||
except Exception as e:
|
||||
import pdb
|
||||
|
||||
pdb.set_trace()
|
||||
raise ValueError("Something wrong: {:}".format(e))
|
||||
# Get the recorder
|
||||
recorder = R.get_recorder()
|
||||
|
@ -45,6 +45,32 @@ DEFAULT_OPT_CONFIG = dict(
|
||||
)
|
||||
|
||||
|
||||
def train_or_test_epoch(
|
||||
xloader, model, loss_fn, metric_fn, is_train, optimizer, device
|
||||
):
|
||||
if is_train:
|
||||
model.train()
|
||||
else:
|
||||
model.eval()
|
||||
score_meter, loss_meter = AverageMeter(), AverageMeter()
|
||||
for ibatch, (feats, labels) in enumerate(xloader):
|
||||
feats, labels = feats.to(device), labels.to(device)
|
||||
# forward the network
|
||||
preds = model(feats)
|
||||
loss = loss_fn(preds, labels)
|
||||
with torch.no_grad():
|
||||
score = metric_fn(preds, labels)
|
||||
loss_meter.update(loss.item(), feats.size(0))
|
||||
score_meter.update(score.item(), feats.size(0))
|
||||
# optimize the network
|
||||
if is_train and optimizer is not None:
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_value_(model.parameters(), 3.0)
|
||||
optimizer.step()
|
||||
return loss_meter.avg, score_meter.avg
|
||||
|
||||
|
||||
class QuantTransformer(Model):
|
||||
"""Transformer-based Quant Model"""
|
||||
|
||||
@ -132,32 +158,6 @@ class QuantTransformer(Model):
|
||||
else:
|
||||
raise ValueError("unknown metric `{:}`".format(self.metric))
|
||||
|
||||
def train_or_test_epoch(
|
||||
self, xloader, model, loss_fn, metric_fn, is_train, optimizer=None
|
||||
):
|
||||
if is_train:
|
||||
model.train()
|
||||
else:
|
||||
model.eval()
|
||||
score_meter, loss_meter = AverageMeter(), AverageMeter()
|
||||
for ibatch, (feats, labels) in enumerate(xloader):
|
||||
feats = feats.to(self.device, non_blocking=True)
|
||||
labels = labels.to(self.device, non_blocking=True)
|
||||
# forward the network
|
||||
preds = model(feats)
|
||||
loss = loss_fn(preds, labels)
|
||||
with torch.no_grad():
|
||||
score = self.metric_fn(preds, labels)
|
||||
loss_meter.update(loss.item(), feats.size(0))
|
||||
score_meter.update(score.item(), feats.size(0))
|
||||
# optimize the network
|
||||
if is_train and optimizer is not None:
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_value_(model.parameters(), 3.0)
|
||||
optimizer.step()
|
||||
return loss_meter.avg, score_meter.avg
|
||||
|
||||
def fit(
|
||||
self,
|
||||
dataset: DatasetH,
|
||||
@ -204,14 +204,22 @@ class QuantTransformer(Model):
|
||||
|
||||
def _internal_test(ckp_epoch=None, results_dict=None):
|
||||
with torch.no_grad():
|
||||
train_loss, train_score = self.train_or_test_epoch(
|
||||
train_loader, self.model, self.loss_fn, self.metric_fn, False, None
|
||||
shared_kwards = {
|
||||
"model": self.model,
|
||||
"loss_fn": self.loss_fn,
|
||||
"metric_fn": self.metric_fn,
|
||||
"is_train": False,
|
||||
"optimizer": None,
|
||||
"device": self.device,
|
||||
}
|
||||
train_loss, train_score = train_or_test_epoch(
|
||||
train_loader, **shared_kwards
|
||||
)
|
||||
valid_loss, valid_score = self.train_or_test_epoch(
|
||||
valid_loader, self.model, self.loss_fn, self.metric_fn, False, None
|
||||
valid_loss, valid_score = train_or_test_epoch(
|
||||
valid_loader, **shared_kwards
|
||||
)
|
||||
test_loss, test_score = self.train_or_test_epoch(
|
||||
test_loader, self.model, self.loss_fn, self.metric_fn, False, None
|
||||
test_loss, test_score = train_or_test_epoch(
|
||||
test_loader, **shared_kwards
|
||||
)
|
||||
xstr = (
|
||||
"train-score={:.6f}, valid-score={:.6f}, test-score={:.6f}".format(
|
||||
@ -255,13 +263,14 @@ class QuantTransformer(Model):
|
||||
iepoch, self.opt_config["epochs"], best_epoch, best_score
|
||||
)
|
||||
)
|
||||
train_loss, train_score = self.train_or_test_epoch(
|
||||
train_loss, train_score = train_or_test_epoch(
|
||||
train_loader,
|
||||
self.model,
|
||||
self.loss_fn,
|
||||
self.metric_fn,
|
||||
True,
|
||||
self.train_optimizer,
|
||||
self.device,
|
||||
)
|
||||
self.logger.info(
|
||||
"Training :: loss={:.6f}, score={:.6f}".format(train_loss, train_score)
|
||||
@ -307,7 +316,8 @@ class QuantTransformer(Model):
|
||||
self.logger.info("Reload the best parameter :: {:}".format(eval_str))
|
||||
|
||||
if self.use_gpu:
|
||||
torch.cuda.empty_cache()
|
||||
with torch.cuda.device(self.device):
|
||||
torch.cuda.empty_cache()
|
||||
self.fitted = True
|
||||
|
||||
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
|
||||
|
@ -30,11 +30,14 @@ class SuperLinear(SuperModule):
|
||||
self._out_features = out_features
|
||||
self._bias = bias
|
||||
# weights to be optimized
|
||||
self._super_weight = torch.nn.Parameter(
|
||||
torch.Tensor(self.out_features, self.in_features)
|
||||
self.register_parameter(
|
||||
"_super_weight",
|
||||
torch.nn.Parameter(torch.Tensor(self.out_features, self.in_features)),
|
||||
)
|
||||
if self.bias:
|
||||
self._super_bias = torch.nn.Parameter(torch.Tensor(self.out_features))
|
||||
self.register_parameter(
|
||||
"_super_bias", torch.nn.Parameter(torch.Tensor(self.out_features))
|
||||
)
|
||||
else:
|
||||
self.register_parameter("_super_bias", None)
|
||||
self.reset_parameters()
|
||||
|
@ -25,8 +25,8 @@ class SuperLayerNorm1D(SuperModule):
|
||||
self._eps = eps
|
||||
self._elementwise_affine = elementwise_affine
|
||||
if self._elementwise_affine:
|
||||
self.weight = nn.Parameter(torch.Tensor(self.in_dim))
|
||||
self.bias = nn.Parameter(torch.Tensor(self.in_dim))
|
||||
self.register_parameter("weight", nn.Parameter(torch.Tensor(self.in_dim)))
|
||||
self.register_parameter("bias", nn.Parameter(torch.Tensor(self.in_dim)))
|
||||
else:
|
||||
self.register_parameter("weight", None)
|
||||
self.register_parameter("bias", None)
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# bash scripts/trade/tsf-all.sh 0 csi300 0
|
||||
# bash scripts/trade/tsf-all.sh 0 csi300 0.1
|
||||
# bash scripts/trade/tsf-all.sh 0 csi300 0_0
|
||||
# bash scripts/trade/tsf-all.sh 0 csi300 0.1_0
|
||||
# bash scripts/trade/tsf-all.sh 1 all
|
||||
#
|
||||
set -e
|
||||
@ -24,6 +24,6 @@ for channel in ${channels}
|
||||
do
|
||||
for depth in ${depths}
|
||||
do
|
||||
python exps/trading/baselines.py --alg TSF-${depth}x${channel}-d${drop} --gpu ${gpu} --market ${market}
|
||||
python exps/trading/baselines.py --alg TSF-${depth}x${channel}-drop${drop} --gpu ${gpu} --market ${market}
|
||||
done
|
||||
done
|
||||
|
@ -1,9 +1,9 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# bash scripts/trade/tsf.sh 0 csi300 3 0
|
||||
# bash scripts/trade/tsf.sh 0 csi300 3 0.1
|
||||
# bash scripts/trade/tsf.sh 1 csi100 3
|
||||
# bash scripts/trade/tsf.sh 1 all 3
|
||||
# bash scripts/trade/tsf.sh 0 csi300 3 0_0
|
||||
# bash scripts/trade/tsf.sh 0 csi300 3 0.1_0
|
||||
# bash scripts/trade/tsf.sh 1 csi100 3 0.2_0
|
||||
# bash scripts/trade/tsf.sh 1 all 3 0.1_0
|
||||
#
|
||||
set -e
|
||||
echo script name: $0
|
||||
@ -24,6 +24,6 @@ channels="6 12 24 32 48 64"
|
||||
for channel in ${channels}
|
||||
do
|
||||
|
||||
python exps/trading/baselines.py --alg TSF-${depth}x${channel}-d${drop} --gpu ${gpu} --market ${market}
|
||||
python exps/trading/baselines.py --alg TSF-${depth}x${channel}-drop${drop} --gpu ${gpu} --market ${market}
|
||||
|
||||
done
|
||||
|
Loading…
Reference in New Issue
Block a user