Update tests for torch/cuda

2021-03-30 12:05:52 +00:00 · 2021-03-30 12:05:52 +00:00 · e5ec43e04a
commit e5ec43e04a
parent c2270fd153
12 changed files with 386 additions and 248 deletions
--- a/.github/workflows/basic_test.yml
+++ b/.github/workflows/basic_test.yml
@ -37,6 +37,7 @@ jobs:
          python -m black ./lib/trade_models -l 88 --check --diff --verbose
          python -m black ./lib/procedures -l 88 --check --diff --verbose
          python -m black ./lib/config_utils -l 88 --check --diff --verbose
+          python -m black ./lib/log_utils -l 88 --check --diff --verbose

      - name: Test Search Space
        run: |
--- a/.latent-data/qlib
+++ b/.latent-data/qlib
@ -1 +1 @@
-Subproject commit 968930e85f4958d16dfc2c5740c02f5c91745b97
+Subproject commit 70c84cbc77e52bea67e4528d6f79e2e462a4ffa6
--- a/exps/trading/baselines.py
+++ b/exps/trading/baselines.py
@ -141,26 +141,25 @@ def retrieve_configs():
    return alg2configs


-def main(xargs, config):
+def main(alg_name, market, config, times, save_dir, gpu):

-    pprint("Run {:}".format(xargs.alg))
-    config = update_market(config, xargs.market)
-    config = update_gpu(config, xargs.gpu)
+    pprint("Run {:}".format(alg_name))
+    config = update_market(config, market)
+    config = update_gpu(config, gpu)

    qlib.init(**config.get("qlib_init"))
    dataset_config = config.get("task").get("dataset")
    dataset = init_instance_by_config(dataset_config)
-    pprint("args: {:}".format(xargs))
    pprint(dataset_config)
    pprint(dataset)

-    for irun in range(xargs.times):
+    for irun in range(times):
        run_exp(
            config.get("task"),
            dataset,
-            xargs.alg,
-            "recorder-{:02d}-{:02d}".format(irun, xargs.times),
-            "{:}-{:}".format(xargs.save_dir, xargs.market),
+            alg_name,
+            "recorder-{:02d}-{:02d}".format(irun, times),
+            "{:}-{:}".format(save_dir, market),
        )


@ -203,6 +202,13 @@ if __name__ == "__main__":
    args = parser.parse_args()

    if len(args.alg) == 1:
-        main(args, alg2configs[args.alg[0]])
+        main(
+            args.alg[0],
+            args.market,
+            alg2configs[args.alg[0]],
+            args.times,
+            args.save_dir,
+            args.gpu,
+        )
    else:
        print("-")
--- a/lib/config_utils/init.py
+++ b/lib/config_utils/init.py
@ -3,6 +3,7 @@
 ##################################################
 # general config related functions
 from .config_utils import load_config, dict2config, configure2str
+
 # the args setting for different experiments
 from .basic_args import obtain_basic_args
 from .attention_args import obtain_attention_args
--- a/lib/log_utils/init.py
+++ b/lib/log_utils/init.py
@ -3,6 +3,14 @@
 ##################################################
 # every package does not rely on pytorch or tensorflow
 # I tried to list all dependency here: os, sys, time, numpy, (possibly) matplotlib
-from .logger       import Logger, PrintLogger
-from .meter        import AverageMeter
-from .time_utils   import time_for_file, time_string, time_string_short, time_print, convert_secs2time
+##################################################
+from .logger import Logger, PrintLogger
+from .meter import AverageMeter
+from .time_utils import (
+    time_for_file,
+    time_string,
+    time_string_short,
+    time_print,
+    convert_secs2time,
+)
+from .pickle_wrap import pickle_save, pickle_load
--- a/lib/log_utils/logger.py
+++ b/lib/log_utils/logger.py
@ -4,147 +4,168 @@
 from pathlib import Path
 import importlib, warnings
 import os, sys, time, numpy as np
-if sys.version_info.major == 2: # Python 2.x
-  from StringIO import StringIO as BIO
-else:                           # Python 3.x
-  from io import BytesIO as BIO

-if importlib.util.find_spec('tensorflow'):
-  import tensorflow as tf
+if sys.version_info.major == 2:  # Python 2.x
+    from StringIO import StringIO as BIO
+else:  # Python 3.x
+    from io import BytesIO as BIO
+
+if importlib.util.find_spec("tensorflow"):
+    import tensorflow as tf


 class PrintLogger(object):
+    def __init__(self):
+        """Create a summary writer logging to log_dir."""
+        self.name = "PrintLogger"

-  def __init__(self):
-    """Create a summary writer logging to log_dir."""
-    self.name = 'PrintLogger'
+    def log(self, string):
+        print(string)

-  def log(self, string):
-    print (string)
-
-  def close(self):
-    print ('-'*30 + ' close printer ' + '-'*30)
+    def close(self):
+        print("-" * 30 + " close printer " + "-" * 30)


 class Logger(object):
+    def __init__(self, log_dir, seed, create_model_dir=True, use_tf=False):
+        """Create a summary writer logging to log_dir."""
+        self.seed = int(seed)
+        self.log_dir = Path(log_dir)
+        self.model_dir = Path(log_dir) / "checkpoint"
+        self.log_dir.mkdir(parents=True, exist_ok=True)
+        if create_model_dir:
+            self.model_dir.mkdir(parents=True, exist_ok=True)
+        # self.meta_dir.mkdir(mode=0o775, parents=True, exist_ok=True)

-  def __init__(self, log_dir, seed, create_model_dir=True, use_tf=False):
-    """Create a summary writer logging to log_dir."""
-    self.seed      = int(seed)
-    self.log_dir   = Path(log_dir)
-    self.model_dir = Path(log_dir) / 'checkpoint'
-    self.log_dir.mkdir  (parents=True, exist_ok=True)
-    if create_model_dir:
-      self.model_dir.mkdir(parents=True, exist_ok=True)
-    #self.meta_dir.mkdir(mode=0o775, parents=True, exist_ok=True)
+        self.use_tf = bool(use_tf)
+        self.tensorboard_dir = self.log_dir / (
+            "tensorboard-{:}".format(time.strftime("%d-%h", time.gmtime(time.time())))
+        )
+        # self.tensorboard_dir = self.log_dir / ('tensorboard-{:}'.format(time.strftime( '%d-%h-at-%H:%M:%S', time.gmtime(time.time()) )))
+        self.logger_path = self.log_dir / "seed-{:}-T-{:}.log".format(
+            self.seed, time.strftime("%d-%h-at-%H-%M-%S", time.gmtime(time.time()))
+        )
+        self.logger_file = open(self.logger_path, "w")

-    self.use_tf  = bool(use_tf)
-    self.tensorboard_dir = self.log_dir / ('tensorboard-{:}'.format(time.strftime( '%d-%h', time.gmtime(time.time()) )))
-    #self.tensorboard_dir = self.log_dir / ('tensorboard-{:}'.format(time.strftime( '%d-%h-at-%H:%M:%S', time.gmtime(time.time()) )))
-    self.logger_path = self.log_dir / 'seed-{:}-T-{:}.log'.format(self.seed, time.strftime('%d-%h-at-%H-%M-%S', time.gmtime(time.time())))
-    self.logger_file = open(self.logger_path, 'w')
+        if self.use_tf:
+            self.tensorboard_dir.mkdir(mode=0o775, parents=True, exist_ok=True)
+            self.writer = tf.summary.FileWriter(str(self.tensorboard_dir))
+        else:
+            self.writer = None

-    if self.use_tf:
-      self.tensorboard_dir.mkdir(mode=0o775, parents=True, exist_ok=True)
-      self.writer = tf.summary.FileWriter(str(self.tensorboard_dir))
-    else:
-      self.writer = None
+    def __repr__(self):
+        return "{name}(dir={log_dir}, use-tf={use_tf}, writer={writer})".format(
+            name=self.__class__.__name__, **self.__dict__
+        )

-  def __repr__(self):
-    return ('{name}(dir={log_dir}, use-tf={use_tf}, writer={writer})'.format(name=self.__class__.__name__, **self.__dict__))
+    def path(self, mode):
+        valids = ("model", "best", "info", "log")
+        if mode == "model":
+            return self.model_dir / "seed-{:}-basic.pth".format(self.seed)
+        elif mode == "best":
+            return self.model_dir / "seed-{:}-best.pth".format(self.seed)
+        elif mode == "info":
+            return self.log_dir / "seed-{:}-last-info.pth".format(self.seed)
+        elif mode == "log":
+            return self.log_dir
+        else:
+            raise TypeError("Unknow mode = {:}, valid modes = {:}".format(mode, valids))

-  def path(self, mode):
-    valids = ('model', 'best', 'info', 'log')
-    if   mode == 'model': return self.model_dir / 'seed-{:}-basic.pth'.format(self.seed)
-    elif mode == 'best' : return self.model_dir / 'seed-{:}-best.pth'.format(self.seed)
-    elif mode == 'info' : return self.log_dir / 'seed-{:}-last-info.pth'.format(self.seed)
-    elif mode == 'log'  : return self.log_dir
-    else: raise TypeError('Unknow mode = {:}, valid modes = {:}'.format(mode, valids))
+    def extract_log(self):
+        return self.logger_file

-  def extract_log(self):
-    return self.logger_file
+    def close(self):
+        self.logger_file.close()
+        if self.writer is not None:
+            self.writer.close()

-  def close(self):
-    self.logger_file.close()
-    if self.writer is not None:
-      self.writer.close()
+    def log(self, string, save=True, stdout=False):
+        if stdout:
+            sys.stdout.write(string)
+            sys.stdout.flush()
+        else:
+            print(string)
+        if save:
+            self.logger_file.write("{:}\n".format(string))
+            self.logger_file.flush()

-  def log(self, string, save=True, stdout=False):
-    if stdout:
-      sys.stdout.write(string); sys.stdout.flush()
-    else:
-      print (string)
-    if save:
-      self.logger_file.write('{:}\n'.format(string))
-      self.logger_file.flush()
+    def scalar_summary(self, tags, values, step):
+        """Log a scalar variable."""
+        if not self.use_tf:
+            warnings.warn("Do set use-tensorflow installed but call scalar_summary")
+        else:
+            assert isinstance(tags, list) == isinstance(
+                values, list
+            ), "Type : {:} vs {:}".format(type(tags), type(values))
+            if not isinstance(tags, list):
+                tags, values = [tags], [values]
+            for tag, value in zip(tags, values):
+                summary = tf.Summary(
+                    value=[tf.Summary.Value(tag=tag, simple_value=value)]
+                )
+                self.writer.add_summary(summary, step)
+                self.writer.flush()

-  def scalar_summary(self, tags, values, step):
-    """Log a scalar variable."""
-    if not self.use_tf:
-      warnings.warn('Do set use-tensorflow installed but call scalar_summary')
-    else:
-      assert isinstance(tags, list) == isinstance(values, list), 'Type : {:} vs {:}'.format(type(tags), type(values))
-      if not isinstance(tags, list):
-        tags, values = [tags], [values]
-      for tag, value in zip(tags, values):
-        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
+    def image_summary(self, tag, images, step):
+        """Log a list of images."""
+        import scipy
+
+        if not self.use_tf:
+            warnings.warn("Do set use-tensorflow installed but call scalar_summary")
+            return
+
+        img_summaries = []
+        for i, img in enumerate(images):
+            # Write the image to a string
+            try:
+                s = StringIO()
+            except:
+                s = BytesIO()
+            scipy.misc.toimage(img).save(s, format="png")
+
+            # Create an Image object
+            img_sum = tf.Summary.Image(
+                encoded_image_string=s.getvalue(),
+                height=img.shape[0],
+                width=img.shape[1],
+            )
+            # Create a Summary value
+            img_summaries.append(
+                tf.Summary.Value(tag="{}/{}".format(tag, i), image=img_sum)
+            )
+
+        # Create and write Summary
+        summary = tf.Summary(value=img_summaries)
        self.writer.add_summary(summary, step)
        self.writer.flush()

-  def image_summary(self, tag, images, step):
-    """Log a list of images."""
-    import scipy
-    if not self.use_tf:
-      warnings.warn('Do set use-tensorflow installed but call scalar_summary')
-      return
+    def histo_summary(self, tag, values, step, bins=1000):
+        """Log a histogram of the tensor of values."""
+        if not self.use_tf:
+            raise ValueError("Do not have tensorflow")
+        import tensorflow as tf

-    img_summaries = []
-    for i, img in enumerate(images):
-      # Write the image to a string
-      try:
-        s = StringIO()
-      except:
-        s = BytesIO()
-      scipy.misc.toimage(img).save(s, format="png")
+        # Create a histogram using numpy
+        counts, bin_edges = np.histogram(values, bins=bins)

-      # Create an Image object
-      img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
-                     height=img.shape[0],
-                     width=img.shape[1])
-      # Create a Summary value
-      img_summaries.append(tf.Summary.Value(tag='{}/{}'.format(tag, i), image=img_sum))
+        # Fill the fields of the histogram proto
+        hist = tf.HistogramProto()
+        hist.min = float(np.min(values))
+        hist.max = float(np.max(values))
+        hist.num = int(np.prod(values.shape))
+        hist.sum = float(np.sum(values))
+        hist.sum_squares = float(np.sum(values ** 2))

-    # Create and write Summary
-    summary = tf.Summary(value=img_summaries)
-    self.writer.add_summary(summary, step)
-    self.writer.flush()
+        # Drop the start of the first bin
+        bin_edges = bin_edges[1:]

-  def histo_summary(self, tag, values, step, bins=1000):
-    """Log a histogram of the tensor of values."""
-    if not self.use_tf: raise ValueError('Do not have tensorflow')
-    import tensorflow as tf
+        # Add bin edges and counts
+        for edge in bin_edges:
+            hist.bucket_limit.append(edge)
+        for c in counts:
+            hist.bucket.append(c)

-    # Create a histogram using numpy
-    counts, bin_edges = np.histogram(values, bins=bins)
-
-    # Fill the fields of the histogram proto
-    hist = tf.HistogramProto()
-    hist.min = float(np.min(values))
-    hist.max = float(np.max(values))
-    hist.num = int(np.prod(values.shape))
-    hist.sum = float(np.sum(values))
-    hist.sum_squares = float(np.sum(values**2))
-
-    # Drop the start of the first bin
-    bin_edges = bin_edges[1:]
-
-    # Add bin edges and counts
-    for edge in bin_edges:
-      hist.bucket_limit.append(edge)
-    for c in counts:
-      hist.bucket.append(c)
-
-    # Create and write Summary
-    summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
-    self.writer.add_summary(summary, step)
-    self.writer.flush()
+        # Create and write Summary
+        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
+        self.writer.add_summary(summary, step)
+        self.writer.flush()
--- a/lib/log_utils/meter.py
+++ b/lib/log_utils/meter.py
@ -2,97 +2,119 @@ import numpy as np


 class AverageMeter(object):
-  """Computes and stores the average and current value"""    
-  def __init__(self):   
-    self.reset()
+    """Computes and stores the average and current value"""

-  def reset(self):
-    self.val   = 0.0
-    self.avg   = 0.0
-    self.sum   = 0.0
-    self.count = 0.0
+    def __init__(self):
+        self.reset()

-  def update(self, val, n=1): 
-    self.val = val    
-    self.sum += val * n     
-    self.count += n
-    self.avg = self.sum / self.count    
+    def reset(self):
+        self.val = 0.0
+        self.avg = 0.0
+        self.sum = 0.0
+        self.count = 0.0

-  def __repr__(self):
-    return ('{name}(val={val}, avg={avg}, count={count})'.format(name=self.__class__.__name__, **self.__dict__))
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+    def __repr__(self):
+        return "{name}(val={val}, avg={avg}, count={count})".format(
+            name=self.__class__.__name__, **self.__dict__
+        )


 class RecorderMeter(object):
-  """Computes and stores the minimum loss value and its epoch index"""
-  def __init__(self, total_epoch):
-    self.reset(total_epoch)
+    """Computes and stores the minimum loss value and its epoch index"""

-  def reset(self, total_epoch):
-    assert total_epoch > 0, 'total_epoch should be greater than 0 vs {:}'.format(total_epoch)
-    self.total_epoch   = total_epoch
-    self.current_epoch = 0
-    self.epoch_losses  = np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
-    self.epoch_losses  = self.epoch_losses - 1
-    self.epoch_accuracy= np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
-    self.epoch_accuracy= self.epoch_accuracy
+    def __init__(self, total_epoch):
+        self.reset(total_epoch)

-  def update(self, idx, train_loss, train_acc, val_loss, val_acc):
-    assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format(self.total_epoch, idx)
-    self.epoch_losses  [idx, 0] = train_loss
-    self.epoch_losses  [idx, 1] = val_loss
-    self.epoch_accuracy[idx, 0] = train_acc
-    self.epoch_accuracy[idx, 1] = val_acc
-    self.current_epoch = idx + 1
-    return self.max_accuracy(False) == self.epoch_accuracy[idx, 1]
+    def reset(self, total_epoch):
+        assert total_epoch > 0, "total_epoch should be greater than 0 vs {:}".format(
+            total_epoch
+        )
+        self.total_epoch = total_epoch
+        self.current_epoch = 0
+        self.epoch_losses = np.zeros(
+            (self.total_epoch, 2), dtype=np.float32
+        )  # [epoch, train/val]
+        self.epoch_losses = self.epoch_losses - 1
+        self.epoch_accuracy = np.zeros(
+            (self.total_epoch, 2), dtype=np.float32
+        )  # [epoch, train/val]
+        self.epoch_accuracy = self.epoch_accuracy

-  def max_accuracy(self, istrain):
-    if self.current_epoch <= 0: return 0
-    if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max()
-    else:       return self.epoch_accuracy[:self.current_epoch, 1].max()
+    def update(self, idx, train_loss, train_acc, val_loss, val_acc):
+        assert (
+            idx >= 0 and idx < self.total_epoch
+        ), "total_epoch : {} , but update with the {} index".format(
+            self.total_epoch, idx
+        )
+        self.epoch_losses[idx, 0] = train_loss
+        self.epoch_losses[idx, 1] = val_loss
+        self.epoch_accuracy[idx, 0] = train_acc
+        self.epoch_accuracy[idx, 1] = val_acc
+        self.current_epoch = idx + 1
+        return self.max_accuracy(False) == self.epoch_accuracy[idx, 1]

-  def plot_curve(self, save_path):
-    import matplotlib
-    matplotlib.use('agg')
-    import matplotlib.pyplot as plt
-    title = 'the accuracy/loss curve of train/val'
-    dpi = 100 
-    width, height = 1600, 1000
-    legend_fontsize = 10
-    figsize = width / float(dpi), height / float(dpi)
+    def max_accuracy(self, istrain):
+        if self.current_epoch <= 0:
+            return 0
+        if istrain:
+            return self.epoch_accuracy[: self.current_epoch, 0].max()
+        else:
+            return self.epoch_accuracy[: self.current_epoch, 1].max()

-    fig = plt.figure(figsize=figsize)
-    x_axis = np.array([i for i in range(self.total_epoch)]) # epochs
-    y_axis = np.zeros(self.total_epoch)
+    def plot_curve(self, save_path):
+        import matplotlib

-    plt.xlim(0, self.total_epoch)
-    plt.ylim(0, 100)
-    interval_y = 5
-    interval_x = 5
-    plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x))
-    plt.yticks(np.arange(0, 100 + interval_y, interval_y))
-    plt.grid()
-    plt.title(title, fontsize=20)
-    plt.xlabel('the training epoch', fontsize=16)
-    plt.ylabel('accuracy', fontsize=16)
+        matplotlib.use("agg")
+        import matplotlib.pyplot as plt

-    y_axis[:] = self.epoch_accuracy[:, 0]
-    plt.plot(x_axis, y_axis, color='g', linestyle='-', label='train-accuracy', lw=2)
-    plt.legend(loc=4, fontsize=legend_fontsize)
+        title = "the accuracy/loss curve of train/val"
+        dpi = 100
+        width, height = 1600, 1000
+        legend_fontsize = 10
+        figsize = width / float(dpi), height / float(dpi)

-    y_axis[:] = self.epoch_accuracy[:, 1]
-    plt.plot(x_axis, y_axis, color='y', linestyle='-', label='valid-accuracy', lw=2)
-    plt.legend(loc=4, fontsize=legend_fontsize)
+        fig = plt.figure(figsize=figsize)
+        x_axis = np.array([i for i in range(self.total_epoch)])  # epochs
+        y_axis = np.zeros(self.total_epoch)

+        plt.xlim(0, self.total_epoch)
+        plt.ylim(0, 100)
+        interval_y = 5
+        interval_x = 5
+        plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x))
+        plt.yticks(np.arange(0, 100 + interval_y, interval_y))
+        plt.grid()
+        plt.title(title, fontsize=20)
+        plt.xlabel("the training epoch", fontsize=16)
+        plt.ylabel("accuracy", fontsize=16)

-    y_axis[:] = self.epoch_losses[:, 0]
-    plt.plot(x_axis, y_axis*50, color='g', linestyle=':', label='train-loss-x50', lw=2)
-    plt.legend(loc=4, fontsize=legend_fontsize)
+        y_axis[:] = self.epoch_accuracy[:, 0]
+        plt.plot(x_axis, y_axis, color="g", linestyle="-", label="train-accuracy", lw=2)
+        plt.legend(loc=4, fontsize=legend_fontsize)

-    y_axis[:] = self.epoch_losses[:, 1]
-    plt.plot(x_axis, y_axis*50, color='y', linestyle=':', label='valid-loss-x50', lw=2)
-    plt.legend(loc=4, fontsize=legend_fontsize)
+        y_axis[:] = self.epoch_accuracy[:, 1]
+        plt.plot(x_axis, y_axis, color="y", linestyle="-", label="valid-accuracy", lw=2)
+        plt.legend(loc=4, fontsize=legend_fontsize)

-    if save_path is not None:
-      fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
-      print ('---- save figure {} into {}'.format(title, save_path))
-    plt.close(fig)
+        y_axis[:] = self.epoch_losses[:, 0]
+        plt.plot(
+            x_axis, y_axis * 50, color="g", linestyle=":", label="train-loss-x50", lw=2
+        )
+        plt.legend(loc=4, fontsize=legend_fontsize)
+
+        y_axis[:] = self.epoch_losses[:, 1]
+        plt.plot(
+            x_axis, y_axis * 50, color="y", linestyle=":", label="valid-loss-x50", lw=2
+        )
+        plt.legend(loc=4, fontsize=legend_fontsize)
+
+        if save_path is not None:
+            fig.savefig(save_path, dpi=dpi, bbox_inches="tight")
+            print("---- save figure {} into {}".format(title, save_path))
+        plt.close(fig)
--- a/lib/log_utils/pickle_wrap.py
+++ b/lib/log_utils/pickle_wrap.py
@ -0,0 +1,21 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
+#####################################################
+import pickle
+from pathlib import Path
+
+
+def pickle_save(obj, path):
+    file_path = Path(path)
+    file_dir = file_path.parent
+    file_dir.mkdir(parents=True, exist_ok=True)
+    with file_path.open("wb") as f:
+        pickle.dump(obj, f)
+
+
+def pickle_load(path):
+    if not Path(path).exists():
+        raise ValueError("{:} does not exists".format(path))
+    with Path(path).open("rb") as f:
+        data = pickle.load(f)
+    return data
--- a/lib/log_utils/time_utils.py
+++ b/lib/log_utils/time_utils.py
@ -4,39 +4,46 @@
 import time, sys
 import numpy as np

+
 def time_for_file():
-  ISOTIMEFORMAT='%d-%h-at-%H-%M-%S'
-  return '{:}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+    ISOTIMEFORMAT = "%d-%h-at-%H-%M-%S"
+    return "{:}".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time())))
+

 def time_string():
-  ISOTIMEFORMAT='%Y-%m-%d %X'
-  string = '[{:}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
-  return string
+    ISOTIMEFORMAT = "%Y-%m-%d %X"
+    string = "[{:}]".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time())))
+    return string
+

 def time_string_short():
-  ISOTIMEFORMAT='%Y%m%d'
-  string = '{:}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
-  return string
+    ISOTIMEFORMAT = "%Y%m%d"
+    string = "{:}".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time())))
+    return string
+

 def time_print(string, is_print=True):
-  if (is_print):
-    print('{} : {}'.format(time_string(), string))
+    if is_print:
+        print("{} : {}".format(time_string(), string))
+

 def convert_secs2time(epoch_time, return_str=False):
-  need_hour = int(epoch_time / 3600)
-  need_mins = int((epoch_time - 3600*need_hour) / 60)  
-  need_secs = int(epoch_time - 3600*need_hour - 60*need_mins)
-  if return_str:
-    str = '[{:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)
-    return str
-  else:
-    return need_hour, need_mins, need_secs
+    need_hour = int(epoch_time / 3600)
+    need_mins = int((epoch_time - 3600 * need_hour) / 60)
+    need_secs = int(epoch_time - 3600 * need_hour - 60 * need_mins)
+    if return_str:
+        str = "[{:02d}:{:02d}:{:02d}]".format(need_hour, need_mins, need_secs)
+        return str
+    else:
+        return need_hour, need_mins, need_secs
+

 def print_log(print_string, log):
-  #if isinstance(log, Logger): log.log('{:}'.format(print_string))
-  if hasattr(log, 'log'): log.log('{:}'.format(print_string))
-  else:
-    print("{:}".format(print_string))
-    if log is not None:
-      log.write('{:}\n'.format(print_string))
-      log.flush()
+    # if isinstance(log, Logger): log.log('{:}'.format(print_string))
+    if hasattr(log, "log"):
+        log.log("{:}".format(print_string))
+    else:
+        print("{:}".format(print_string))
+        if log is not None:
+            log.write("{:}\n".format(print_string))
+            log.flush()
--- a/lib/utils/flop_benchmark.py
+++ b/lib/utils/flop_benchmark.py
@ -9,15 +9,19 @@ def count_parameters_in_MB(model):

 def count_parameters(model_or_parameters, unit="mb"):
    if isinstance(model_or_parameters, nn.Module):
-        counts = np.sum(np.prod(v.size()) for v in model_or_parameters.parameters())
+        counts = sum(np.prod(v.size()) for v in model_or_parameters.parameters())
+    elif isinstance(models_or_parameters, nn.Parameter):
+        counts = models_or_parameters.numel()
+    elif isinstance(models_or_parameters, (list, tuple)):
+        counts = sum(count_parameters(x, None) for x in models_or_parameters)
    else:
-        counts = np.sum(np.prod(v.size()) for v in model_or_parameters)
-    if unit.lower() == "mb":
-        counts /= 1e6
-    elif unit.lower() == "kb":
-        counts /= 1e3
-    elif unit.lower() == "gb":
-        counts /= 1e9
+        counts = sum(np.prod(v.size()) for v in model_or_parameters)
+    if unit.lower() == "kb" or unit.lower() == "k":
+        counts /= 2 ** 10  # changed from 1e3 to 2^10
+    elif unit.lower() == "mb" or unit.lower() == "m":
+        counts /= 2 ** 20  # changed from 1e6 to 2^20
+    elif unit.lower() == "gb" or unit.lower() == "g":
+        counts /= 2 ** 30  # changed from 1e9 to 2^30
    elif unit is not None:
        raise ValueError("Unknow unit: {:}".format(unit))
    return counts
--- a/tests/test_torch.sh
+++ b/tests/test_torch.sh
@ -0,0 +1,4 @@
+# bash ./tests/test_torch.sh
+
+pytest ./tests/test_torch_gpu_bugs.py::test_create -s
+CUDA_VISIBLE_DEVICES="" pytest ./tests/test_torch_gpu_bugs.py::test_load -s
--- a/tests/test_torch_gpu_bugs.py
+++ b/tests/test_torch_gpu_bugs.py
@ -0,0 +1,43 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 #
+#####################################################
+# pytest ./tests/test_torch_gpu_bugs.py::test_create
+#
+# CUDA_VISIBLE_DEVICES="" pytest ./tests/test_torch_gpu_bugs.py::test_load
+#####################################################
+import os, sys, time, torch
+import pickle
+import tempfile
+from pathlib import Path
+
+lib_dir = (Path(__file__).parent / ".." / "lib").resolve()
+print("library path: {:}".format(lib_dir))
+if str(lib_dir) not in sys.path:
+    sys.path.insert(0, str(lib_dir))
+
+from trade_models.quant_transformer import QuantTransformer
+
+
+def test_create():
+    """Test the basic quant-model."""
+    if not torch.cuda.is_available():
+        return
+    quant_model = QuantTransformer(GPU=0)
+    temp_dir = lib_dir / ".." / "tests" / ".pytest_cache"
+    temp_dir.mkdir(parents=True, exist_ok=True)
+    temp_file = temp_dir / "quant-model.pkl"
+    with temp_file.open("wb") as f:
+        # quant_model.to(None)
+        quant_model.to("cpu")
+        # del quant_model.model
+        # del quant_model.train_optimizer
+        pickle.dump(quant_model, f)
+    print("save into {:}".format(temp_file))
+
+
+def test_load():
+    temp_file = lib_dir / ".." / "tests" / ".pytest_cache" / "quant-model.pkl"
+    with temp_file.open("rb") as f:
+        model = pickle.load(f)
+        print(model.model)
+        print(model.train_optimizer)