Fix CUDA memory issues
This commit is contained in:
parent
5fb900bcb1
commit
9fc2c991f5
@ -15,8 +15,8 @@
|
|||||||
# python exps/trading/baselines.py --alg TabNet #
|
# python exps/trading/baselines.py --alg TabNet #
|
||||||
# #
|
# #
|
||||||
# python exps/trading/baselines.py --alg Transformer#
|
# python exps/trading/baselines.py --alg Transformer#
|
||||||
# python exps/trading/baselines.py --alg TSF #
|
# python exps/trading/baselines.py --alg TSF
|
||||||
# python exps/trading/baselines.py --alg TSF-4x64-d0
|
# python exps/trading/baselines.py --alg TSF-4x64-drop0_0
|
||||||
#####################################################
|
#####################################################
|
||||||
import sys
|
import sys
|
||||||
import copy
|
import copy
|
||||||
@ -40,10 +40,11 @@ from qlib.workflow import R
|
|||||||
from qlib.utils import flatten_dict
|
from qlib.utils import flatten_dict
|
||||||
|
|
||||||
|
|
||||||
def to_pos_drop(config, value):
|
def to_drop(config, pos_drop, other_drop):
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
net = config["task"]["model"]["kwargs"]["net_config"]
|
net = config["task"]["model"]["kwargs"]["net_config"]
|
||||||
net["pos_drop"] = value
|
net["pos_drop"] = pos_drop
|
||||||
|
net["other_drop"] = other_drop
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
@ -59,11 +60,12 @@ def to_layer(config, embed_dim, depth):
|
|||||||
def extend_transformer_settings(alg2configs, name):
|
def extend_transformer_settings(alg2configs, name):
|
||||||
config = copy.deepcopy(alg2configs[name])
|
config = copy.deepcopy(alg2configs[name])
|
||||||
for i in range(1, 7):
|
for i in range(1, 7):
|
||||||
for j in [6, 12, 24, 32, 48, 64]:
|
for j in (6, 12, 24, 32, 48, 64):
|
||||||
for k in [0, 0.1]:
|
for k1 in (0, 0.1, 0.2):
|
||||||
alg2configs[name + "-{:}x{:}-d{:}".format(i, j, k)] = to_layer(
|
for k2 in (0, 0.1):
|
||||||
to_pos_drop(config, k), j, i
|
alg2configs[
|
||||||
)
|
name + "-{:}x{:}-drop{:}_{:}".format(i, j, k1, k2)
|
||||||
|
] = to_layer(to_drop(config, k1, k2), j, i)
|
||||||
return alg2configs
|
return alg2configs
|
||||||
|
|
||||||
|
|
||||||
|
@ -107,7 +107,7 @@ def run_exp(
|
|||||||
model = R.load_object(model_obj_name)
|
model = R.load_object(model_obj_name)
|
||||||
logger.info("[Find existing object from {:}]".format(model_obj_name))
|
logger.info("[Find existing object from {:}]".format(model_obj_name))
|
||||||
except OSError:
|
except OSError:
|
||||||
# R.log_params(**flatten_dict(task_config))
|
R.log_params(**flatten_dict(update_gpu(task_config, None)))
|
||||||
if "save_path" in inspect.getfullargspec(model.fit).args:
|
if "save_path" in inspect.getfullargspec(model.fit).args:
|
||||||
model_fit_kwargs["save_path"] = os.path.join(
|
model_fit_kwargs["save_path"] = os.path.join(
|
||||||
recorder_root_dir, "model.ckp"
|
recorder_root_dir, "model.ckp"
|
||||||
@ -126,9 +126,6 @@ def run_exp(
|
|||||||
else:
|
else:
|
||||||
R.save_objects(**{model_obj_name: model})
|
R.save_objects(**{model_obj_name: model})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import pdb
|
|
||||||
|
|
||||||
pdb.set_trace()
|
|
||||||
raise ValueError("Something wrong: {:}".format(e))
|
raise ValueError("Something wrong: {:}".format(e))
|
||||||
# Get the recorder
|
# Get the recorder
|
||||||
recorder = R.get_recorder()
|
recorder = R.get_recorder()
|
||||||
|
@ -45,6 +45,32 @@ DEFAULT_OPT_CONFIG = dict(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def train_or_test_epoch(
|
||||||
|
xloader, model, loss_fn, metric_fn, is_train, optimizer, device
|
||||||
|
):
|
||||||
|
if is_train:
|
||||||
|
model.train()
|
||||||
|
else:
|
||||||
|
model.eval()
|
||||||
|
score_meter, loss_meter = AverageMeter(), AverageMeter()
|
||||||
|
for ibatch, (feats, labels) in enumerate(xloader):
|
||||||
|
feats, labels = feats.to(device), labels.to(device)
|
||||||
|
# forward the network
|
||||||
|
preds = model(feats)
|
||||||
|
loss = loss_fn(preds, labels)
|
||||||
|
with torch.no_grad():
|
||||||
|
score = metric_fn(preds, labels)
|
||||||
|
loss_meter.update(loss.item(), feats.size(0))
|
||||||
|
score_meter.update(score.item(), feats.size(0))
|
||||||
|
# optimize the network
|
||||||
|
if is_train and optimizer is not None:
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss.backward()
|
||||||
|
torch.nn.utils.clip_grad_value_(model.parameters(), 3.0)
|
||||||
|
optimizer.step()
|
||||||
|
return loss_meter.avg, score_meter.avg
|
||||||
|
|
||||||
|
|
||||||
class QuantTransformer(Model):
|
class QuantTransformer(Model):
|
||||||
"""Transformer-based Quant Model"""
|
"""Transformer-based Quant Model"""
|
||||||
|
|
||||||
@ -132,32 +158,6 @@ class QuantTransformer(Model):
|
|||||||
else:
|
else:
|
||||||
raise ValueError("unknown metric `{:}`".format(self.metric))
|
raise ValueError("unknown metric `{:}`".format(self.metric))
|
||||||
|
|
||||||
def train_or_test_epoch(
|
|
||||||
self, xloader, model, loss_fn, metric_fn, is_train, optimizer=None
|
|
||||||
):
|
|
||||||
if is_train:
|
|
||||||
model.train()
|
|
||||||
else:
|
|
||||||
model.eval()
|
|
||||||
score_meter, loss_meter = AverageMeter(), AverageMeter()
|
|
||||||
for ibatch, (feats, labels) in enumerate(xloader):
|
|
||||||
feats = feats.to(self.device, non_blocking=True)
|
|
||||||
labels = labels.to(self.device, non_blocking=True)
|
|
||||||
# forward the network
|
|
||||||
preds = model(feats)
|
|
||||||
loss = loss_fn(preds, labels)
|
|
||||||
with torch.no_grad():
|
|
||||||
score = self.metric_fn(preds, labels)
|
|
||||||
loss_meter.update(loss.item(), feats.size(0))
|
|
||||||
score_meter.update(score.item(), feats.size(0))
|
|
||||||
# optimize the network
|
|
||||||
if is_train and optimizer is not None:
|
|
||||||
optimizer.zero_grad()
|
|
||||||
loss.backward()
|
|
||||||
torch.nn.utils.clip_grad_value_(model.parameters(), 3.0)
|
|
||||||
optimizer.step()
|
|
||||||
return loss_meter.avg, score_meter.avg
|
|
||||||
|
|
||||||
def fit(
|
def fit(
|
||||||
self,
|
self,
|
||||||
dataset: DatasetH,
|
dataset: DatasetH,
|
||||||
@ -204,14 +204,22 @@ class QuantTransformer(Model):
|
|||||||
|
|
||||||
def _internal_test(ckp_epoch=None, results_dict=None):
|
def _internal_test(ckp_epoch=None, results_dict=None):
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
train_loss, train_score = self.train_or_test_epoch(
|
shared_kwards = {
|
||||||
train_loader, self.model, self.loss_fn, self.metric_fn, False, None
|
"model": self.model,
|
||||||
|
"loss_fn": self.loss_fn,
|
||||||
|
"metric_fn": self.metric_fn,
|
||||||
|
"is_train": False,
|
||||||
|
"optimizer": None,
|
||||||
|
"device": self.device,
|
||||||
|
}
|
||||||
|
train_loss, train_score = train_or_test_epoch(
|
||||||
|
train_loader, **shared_kwards
|
||||||
)
|
)
|
||||||
valid_loss, valid_score = self.train_or_test_epoch(
|
valid_loss, valid_score = train_or_test_epoch(
|
||||||
valid_loader, self.model, self.loss_fn, self.metric_fn, False, None
|
valid_loader, **shared_kwards
|
||||||
)
|
)
|
||||||
test_loss, test_score = self.train_or_test_epoch(
|
test_loss, test_score = train_or_test_epoch(
|
||||||
test_loader, self.model, self.loss_fn, self.metric_fn, False, None
|
test_loader, **shared_kwards
|
||||||
)
|
)
|
||||||
xstr = (
|
xstr = (
|
||||||
"train-score={:.6f}, valid-score={:.6f}, test-score={:.6f}".format(
|
"train-score={:.6f}, valid-score={:.6f}, test-score={:.6f}".format(
|
||||||
@ -255,13 +263,14 @@ class QuantTransformer(Model):
|
|||||||
iepoch, self.opt_config["epochs"], best_epoch, best_score
|
iepoch, self.opt_config["epochs"], best_epoch, best_score
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
train_loss, train_score = self.train_or_test_epoch(
|
train_loss, train_score = train_or_test_epoch(
|
||||||
train_loader,
|
train_loader,
|
||||||
self.model,
|
self.model,
|
||||||
self.loss_fn,
|
self.loss_fn,
|
||||||
self.metric_fn,
|
self.metric_fn,
|
||||||
True,
|
True,
|
||||||
self.train_optimizer,
|
self.train_optimizer,
|
||||||
|
self.device,
|
||||||
)
|
)
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
"Training :: loss={:.6f}, score={:.6f}".format(train_loss, train_score)
|
"Training :: loss={:.6f}, score={:.6f}".format(train_loss, train_score)
|
||||||
@ -307,7 +316,8 @@ class QuantTransformer(Model):
|
|||||||
self.logger.info("Reload the best parameter :: {:}".format(eval_str))
|
self.logger.info("Reload the best parameter :: {:}".format(eval_str))
|
||||||
|
|
||||||
if self.use_gpu:
|
if self.use_gpu:
|
||||||
torch.cuda.empty_cache()
|
with torch.cuda.device(self.device):
|
||||||
|
torch.cuda.empty_cache()
|
||||||
self.fitted = True
|
self.fitted = True
|
||||||
|
|
||||||
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
|
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
|
||||||
|
@ -30,11 +30,14 @@ class SuperLinear(SuperModule):
|
|||||||
self._out_features = out_features
|
self._out_features = out_features
|
||||||
self._bias = bias
|
self._bias = bias
|
||||||
# weights to be optimized
|
# weights to be optimized
|
||||||
self._super_weight = torch.nn.Parameter(
|
self.register_parameter(
|
||||||
torch.Tensor(self.out_features, self.in_features)
|
"_super_weight",
|
||||||
|
torch.nn.Parameter(torch.Tensor(self.out_features, self.in_features)),
|
||||||
)
|
)
|
||||||
if self.bias:
|
if self.bias:
|
||||||
self._super_bias = torch.nn.Parameter(torch.Tensor(self.out_features))
|
self.register_parameter(
|
||||||
|
"_super_bias", torch.nn.Parameter(torch.Tensor(self.out_features))
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.register_parameter("_super_bias", None)
|
self.register_parameter("_super_bias", None)
|
||||||
self.reset_parameters()
|
self.reset_parameters()
|
||||||
|
@ -25,8 +25,8 @@ class SuperLayerNorm1D(SuperModule):
|
|||||||
self._eps = eps
|
self._eps = eps
|
||||||
self._elementwise_affine = elementwise_affine
|
self._elementwise_affine = elementwise_affine
|
||||||
if self._elementwise_affine:
|
if self._elementwise_affine:
|
||||||
self.weight = nn.Parameter(torch.Tensor(self.in_dim))
|
self.register_parameter("weight", nn.Parameter(torch.Tensor(self.in_dim)))
|
||||||
self.bias = nn.Parameter(torch.Tensor(self.in_dim))
|
self.register_parameter("bias", nn.Parameter(torch.Tensor(self.in_dim)))
|
||||||
else:
|
else:
|
||||||
self.register_parameter("weight", None)
|
self.register_parameter("weight", None)
|
||||||
self.register_parameter("bias", None)
|
self.register_parameter("bias", None)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# bash scripts/trade/tsf-all.sh 0 csi300 0
|
# bash scripts/trade/tsf-all.sh 0 csi300 0_0
|
||||||
# bash scripts/trade/tsf-all.sh 0 csi300 0.1
|
# bash scripts/trade/tsf-all.sh 0 csi300 0.1_0
|
||||||
# bash scripts/trade/tsf-all.sh 1 all
|
# bash scripts/trade/tsf-all.sh 1 all
|
||||||
#
|
#
|
||||||
set -e
|
set -e
|
||||||
@ -24,6 +24,6 @@ for channel in ${channels}
|
|||||||
do
|
do
|
||||||
for depth in ${depths}
|
for depth in ${depths}
|
||||||
do
|
do
|
||||||
python exps/trading/baselines.py --alg TSF-${depth}x${channel}-d${drop} --gpu ${gpu} --market ${market}
|
python exps/trading/baselines.py --alg TSF-${depth}x${channel}-drop${drop} --gpu ${gpu} --market ${market}
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# bash scripts/trade/tsf.sh 0 csi300 3 0
|
# bash scripts/trade/tsf.sh 0 csi300 3 0_0
|
||||||
# bash scripts/trade/tsf.sh 0 csi300 3 0.1
|
# bash scripts/trade/tsf.sh 0 csi300 3 0.1_0
|
||||||
# bash scripts/trade/tsf.sh 1 csi100 3
|
# bash scripts/trade/tsf.sh 1 csi100 3 0.2_0
|
||||||
# bash scripts/trade/tsf.sh 1 all 3
|
# bash scripts/trade/tsf.sh 1 all 3 0.1_0
|
||||||
#
|
#
|
||||||
set -e
|
set -e
|
||||||
echo script name: $0
|
echo script name: $0
|
||||||
@ -24,6 +24,6 @@ channels="6 12 24 32 48 64"
|
|||||||
for channel in ${channels}
|
for channel in ${channels}
|
||||||
do
|
do
|
||||||
|
|
||||||
python exps/trading/baselines.py --alg TSF-${depth}x${channel}-d${drop} --gpu ${gpu} --market ${market}
|
python exps/trading/baselines.py --alg TSF-${depth}x${channel}-drop${drop} --gpu ${gpu} --market ${market}
|
||||||
|
|
||||||
done
|
done
|
||||||
|
Loading…
Reference in New Issue
Block a user