diff --git a/.github/workflows/basic_test.yml b/.github/workflows/basic_test.yml index 57751b8..5db1a13 100644 --- a/.github/workflows/basic_test.yml +++ b/.github/workflows/basic_test.yml @@ -37,6 +37,7 @@ jobs: python -m black ./lib/trade_models -l 88 --check --diff --verbose python -m black ./lib/procedures -l 88 --check --diff --verbose python -m black ./lib/config_utils -l 88 --check --diff --verbose + python -m black ./lib/log_utils -l 88 --check --diff --verbose - name: Test Search Space run: | diff --git a/.latent-data/qlib b/.latent-data/qlib index 968930e..70c84cb 160000 --- a/.latent-data/qlib +++ b/.latent-data/qlib @@ -1 +1 @@ -Subproject commit 968930e85f4958d16dfc2c5740c02f5c91745b97 +Subproject commit 70c84cbc77e52bea67e4528d6f79e2e462a4ffa6 diff --git a/exps/trading/baselines.py b/exps/trading/baselines.py index 59adb5d..d33db01 100644 --- a/exps/trading/baselines.py +++ b/exps/trading/baselines.py @@ -141,26 +141,25 @@ def retrieve_configs(): return alg2configs -def main(xargs, config): +def main(alg_name, market, config, times, save_dir, gpu): - pprint("Run {:}".format(xargs.alg)) - config = update_market(config, xargs.market) - config = update_gpu(config, xargs.gpu) + pprint("Run {:}".format(alg_name)) + config = update_market(config, market) + config = update_gpu(config, gpu) qlib.init(**config.get("qlib_init")) dataset_config = config.get("task").get("dataset") dataset = init_instance_by_config(dataset_config) - pprint("args: {:}".format(xargs)) pprint(dataset_config) pprint(dataset) - for irun in range(xargs.times): + for irun in range(times): run_exp( config.get("task"), dataset, - xargs.alg, - "recorder-{:02d}-{:02d}".format(irun, xargs.times), - "{:}-{:}".format(xargs.save_dir, xargs.market), + alg_name, + "recorder-{:02d}-{:02d}".format(irun, times), + "{:}-{:}".format(save_dir, market), ) @@ -203,6 +202,13 @@ if __name__ == "__main__": args = parser.parse_args() if len(args.alg) == 1: - main(args, alg2configs[args.alg[0]]) + main( + args.alg[0], + args.market, + alg2configs[args.alg[0]], + args.times, + args.save_dir, + args.gpu, + ) else: print("-") diff --git a/lib/config_utils/__init__.py b/lib/config_utils/__init__.py index 85a162d..2ee6bae 100644 --- a/lib/config_utils/__init__.py +++ b/lib/config_utils/__init__.py @@ -3,6 +3,7 @@ ################################################## # general config related functions from .config_utils import load_config, dict2config, configure2str + # the args setting for different experiments from .basic_args import obtain_basic_args from .attention_args import obtain_attention_args diff --git a/lib/log_utils/__init__.py b/lib/log_utils/__init__.py index c491293..4c9f165 100644 --- a/lib/log_utils/__init__.py +++ b/lib/log_utils/__init__.py @@ -3,6 +3,14 @@ ################################################## # every package does not rely on pytorch or tensorflow # I tried to list all dependency here: os, sys, time, numpy, (possibly) matplotlib -from .logger import Logger, PrintLogger -from .meter import AverageMeter -from .time_utils import time_for_file, time_string, time_string_short, time_print, convert_secs2time +################################################## +from .logger import Logger, PrintLogger +from .meter import AverageMeter +from .time_utils import ( + time_for_file, + time_string, + time_string_short, + time_print, + convert_secs2time, +) +from .pickle_wrap import pickle_save, pickle_load diff --git a/lib/log_utils/logger.py b/lib/log_utils/logger.py index e60c78f..c95573f 100644 --- a/lib/log_utils/logger.py +++ b/lib/log_utils/logger.py @@ -4,147 +4,168 @@ from pathlib import Path import importlib, warnings import os, sys, time, numpy as np -if sys.version_info.major == 2: # Python 2.x - from StringIO import StringIO as BIO -else: # Python 3.x - from io import BytesIO as BIO -if importlib.util.find_spec('tensorflow'): - import tensorflow as tf +if sys.version_info.major == 2: # Python 2.x + from StringIO import StringIO as BIO +else: # Python 3.x + from io import BytesIO as BIO + +if importlib.util.find_spec("tensorflow"): + import tensorflow as tf class PrintLogger(object): - - def __init__(self): - """Create a summary writer logging to log_dir.""" - self.name = 'PrintLogger' + def __init__(self): + """Create a summary writer logging to log_dir.""" + self.name = "PrintLogger" - def log(self, string): - print (string) + def log(self, string): + print(string) - def close(self): - print ('-'*30 + ' close printer ' + '-'*30) + def close(self): + print("-" * 30 + " close printer " + "-" * 30) class Logger(object): - - def __init__(self, log_dir, seed, create_model_dir=True, use_tf=False): - """Create a summary writer logging to log_dir.""" - self.seed = int(seed) - self.log_dir = Path(log_dir) - self.model_dir = Path(log_dir) / 'checkpoint' - self.log_dir.mkdir (parents=True, exist_ok=True) - if create_model_dir: - self.model_dir.mkdir(parents=True, exist_ok=True) - #self.meta_dir.mkdir(mode=0o775, parents=True, exist_ok=True) + def __init__(self, log_dir, seed, create_model_dir=True, use_tf=False): + """Create a summary writer logging to log_dir.""" + self.seed = int(seed) + self.log_dir = Path(log_dir) + self.model_dir = Path(log_dir) / "checkpoint" + self.log_dir.mkdir(parents=True, exist_ok=True) + if create_model_dir: + self.model_dir.mkdir(parents=True, exist_ok=True) + # self.meta_dir.mkdir(mode=0o775, parents=True, exist_ok=True) - self.use_tf = bool(use_tf) - self.tensorboard_dir = self.log_dir / ('tensorboard-{:}'.format(time.strftime( '%d-%h', time.gmtime(time.time()) ))) - #self.tensorboard_dir = self.log_dir / ('tensorboard-{:}'.format(time.strftime( '%d-%h-at-%H:%M:%S', time.gmtime(time.time()) ))) - self.logger_path = self.log_dir / 'seed-{:}-T-{:}.log'.format(self.seed, time.strftime('%d-%h-at-%H-%M-%S', time.gmtime(time.time()))) - self.logger_file = open(self.logger_path, 'w') + self.use_tf = bool(use_tf) + self.tensorboard_dir = self.log_dir / ( + "tensorboard-{:}".format(time.strftime("%d-%h", time.gmtime(time.time()))) + ) + # self.tensorboard_dir = self.log_dir / ('tensorboard-{:}'.format(time.strftime( '%d-%h-at-%H:%M:%S', time.gmtime(time.time()) ))) + self.logger_path = self.log_dir / "seed-{:}-T-{:}.log".format( + self.seed, time.strftime("%d-%h-at-%H-%M-%S", time.gmtime(time.time())) + ) + self.logger_file = open(self.logger_path, "w") - if self.use_tf: - self.tensorboard_dir.mkdir(mode=0o775, parents=True, exist_ok=True) - self.writer = tf.summary.FileWriter(str(self.tensorboard_dir)) - else: - self.writer = None + if self.use_tf: + self.tensorboard_dir.mkdir(mode=0o775, parents=True, exist_ok=True) + self.writer = tf.summary.FileWriter(str(self.tensorboard_dir)) + else: + self.writer = None - def __repr__(self): - return ('{name}(dir={log_dir}, use-tf={use_tf}, writer={writer})'.format(name=self.__class__.__name__, **self.__dict__)) + def __repr__(self): + return "{name}(dir={log_dir}, use-tf={use_tf}, writer={writer})".format( + name=self.__class__.__name__, **self.__dict__ + ) - def path(self, mode): - valids = ('model', 'best', 'info', 'log') - if mode == 'model': return self.model_dir / 'seed-{:}-basic.pth'.format(self.seed) - elif mode == 'best' : return self.model_dir / 'seed-{:}-best.pth'.format(self.seed) - elif mode == 'info' : return self.log_dir / 'seed-{:}-last-info.pth'.format(self.seed) - elif mode == 'log' : return self.log_dir - else: raise TypeError('Unknow mode = {:}, valid modes = {:}'.format(mode, valids)) + def path(self, mode): + valids = ("model", "best", "info", "log") + if mode == "model": + return self.model_dir / "seed-{:}-basic.pth".format(self.seed) + elif mode == "best": + return self.model_dir / "seed-{:}-best.pth".format(self.seed) + elif mode == "info": + return self.log_dir / "seed-{:}-last-info.pth".format(self.seed) + elif mode == "log": + return self.log_dir + else: + raise TypeError("Unknow mode = {:}, valid modes = {:}".format(mode, valids)) - def extract_log(self): - return self.logger_file + def extract_log(self): + return self.logger_file - def close(self): - self.logger_file.close() - if self.writer is not None: - self.writer.close() + def close(self): + self.logger_file.close() + if self.writer is not None: + self.writer.close() - def log(self, string, save=True, stdout=False): - if stdout: - sys.stdout.write(string); sys.stdout.flush() - else: - print (string) - if save: - self.logger_file.write('{:}\n'.format(string)) - self.logger_file.flush() + def log(self, string, save=True, stdout=False): + if stdout: + sys.stdout.write(string) + sys.stdout.flush() + else: + print(string) + if save: + self.logger_file.write("{:}\n".format(string)) + self.logger_file.flush() - def scalar_summary(self, tags, values, step): - """Log a scalar variable.""" - if not self.use_tf: - warnings.warn('Do set use-tensorflow installed but call scalar_summary') - else: - assert isinstance(tags, list) == isinstance(values, list), 'Type : {:} vs {:}'.format(type(tags), type(values)) - if not isinstance(tags, list): - tags, values = [tags], [values] - for tag, value in zip(tags, values): - summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) + def scalar_summary(self, tags, values, step): + """Log a scalar variable.""" + if not self.use_tf: + warnings.warn("Do set use-tensorflow installed but call scalar_summary") + else: + assert isinstance(tags, list) == isinstance( + values, list + ), "Type : {:} vs {:}".format(type(tags), type(values)) + if not isinstance(tags, list): + tags, values = [tags], [values] + for tag, value in zip(tags, values): + summary = tf.Summary( + value=[tf.Summary.Value(tag=tag, simple_value=value)] + ) + self.writer.add_summary(summary, step) + self.writer.flush() + + def image_summary(self, tag, images, step): + """Log a list of images.""" + import scipy + + if not self.use_tf: + warnings.warn("Do set use-tensorflow installed but call scalar_summary") + return + + img_summaries = [] + for i, img in enumerate(images): + # Write the image to a string + try: + s = StringIO() + except: + s = BytesIO() + scipy.misc.toimage(img).save(s, format="png") + + # Create an Image object + img_sum = tf.Summary.Image( + encoded_image_string=s.getvalue(), + height=img.shape[0], + width=img.shape[1], + ) + # Create a Summary value + img_summaries.append( + tf.Summary.Value(tag="{}/{}".format(tag, i), image=img_sum) + ) + + # Create and write Summary + summary = tf.Summary(value=img_summaries) self.writer.add_summary(summary, step) self.writer.flush() - def image_summary(self, tag, images, step): - """Log a list of images.""" - import scipy - if not self.use_tf: - warnings.warn('Do set use-tensorflow installed but call scalar_summary') - return + def histo_summary(self, tag, values, step, bins=1000): + """Log a histogram of the tensor of values.""" + if not self.use_tf: + raise ValueError("Do not have tensorflow") + import tensorflow as tf - img_summaries = [] - for i, img in enumerate(images): - # Write the image to a string - try: - s = StringIO() - except: - s = BytesIO() - scipy.misc.toimage(img).save(s, format="png") + # Create a histogram using numpy + counts, bin_edges = np.histogram(values, bins=bins) - # Create an Image object - img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), - height=img.shape[0], - width=img.shape[1]) - # Create a Summary value - img_summaries.append(tf.Summary.Value(tag='{}/{}'.format(tag, i), image=img_sum)) + # Fill the fields of the histogram proto + hist = tf.HistogramProto() + hist.min = float(np.min(values)) + hist.max = float(np.max(values)) + hist.num = int(np.prod(values.shape)) + hist.sum = float(np.sum(values)) + hist.sum_squares = float(np.sum(values ** 2)) - # Create and write Summary - summary = tf.Summary(value=img_summaries) - self.writer.add_summary(summary, step) - self.writer.flush() - - def histo_summary(self, tag, values, step, bins=1000): - """Log a histogram of the tensor of values.""" - if not self.use_tf: raise ValueError('Do not have tensorflow') - import tensorflow as tf + # Drop the start of the first bin + bin_edges = bin_edges[1:] - # Create a histogram using numpy - counts, bin_edges = np.histogram(values, bins=bins) + # Add bin edges and counts + for edge in bin_edges: + hist.bucket_limit.append(edge) + for c in counts: + hist.bucket.append(c) - # Fill the fields of the histogram proto - hist = tf.HistogramProto() - hist.min = float(np.min(values)) - hist.max = float(np.max(values)) - hist.num = int(np.prod(values.shape)) - hist.sum = float(np.sum(values)) - hist.sum_squares = float(np.sum(values**2)) - - # Drop the start of the first bin - bin_edges = bin_edges[1:] - - # Add bin edges and counts - for edge in bin_edges: - hist.bucket_limit.append(edge) - for c in counts: - hist.bucket.append(c) - - # Create and write Summary - summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) - self.writer.add_summary(summary, step) - self.writer.flush() + # Create and write Summary + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) + self.writer.add_summary(summary, step) + self.writer.flush() diff --git a/lib/log_utils/meter.py b/lib/log_utils/meter.py index cbb9dd1..2bbab98 100644 --- a/lib/log_utils/meter.py +++ b/lib/log_utils/meter.py @@ -1,98 +1,120 @@ import numpy as np -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self): - self.reset() - - def reset(self): - self.val = 0.0 - self.avg = 0.0 - self.sum = 0.0 - self.count = 0.0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count +class AverageMeter(object): + """Computes and stores the average and current value""" - def __repr__(self): - return ('{name}(val={val}, avg={avg}, count={count})'.format(name=self.__class__.__name__, **self.__dict__)) + def __init__(self): + self.reset() + + def reset(self): + self.val = 0.0 + self.avg = 0.0 + self.sum = 0.0 + self.count = 0.0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __repr__(self): + return "{name}(val={val}, avg={avg}, count={count})".format( + name=self.__class__.__name__, **self.__dict__ + ) class RecorderMeter(object): - """Computes and stores the minimum loss value and its epoch index""" - def __init__(self, total_epoch): - self.reset(total_epoch) + """Computes and stores the minimum loss value and its epoch index""" - def reset(self, total_epoch): - assert total_epoch > 0, 'total_epoch should be greater than 0 vs {:}'.format(total_epoch) - self.total_epoch = total_epoch - self.current_epoch = 0 - self.epoch_losses = np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val] - self.epoch_losses = self.epoch_losses - 1 - self.epoch_accuracy= np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val] - self.epoch_accuracy= self.epoch_accuracy + def __init__(self, total_epoch): + self.reset(total_epoch) - def update(self, idx, train_loss, train_acc, val_loss, val_acc): - assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format(self.total_epoch, idx) - self.epoch_losses [idx, 0] = train_loss - self.epoch_losses [idx, 1] = val_loss - self.epoch_accuracy[idx, 0] = train_acc - self.epoch_accuracy[idx, 1] = val_acc - self.current_epoch = idx + 1 - return self.max_accuracy(False) == self.epoch_accuracy[idx, 1] + def reset(self, total_epoch): + assert total_epoch > 0, "total_epoch should be greater than 0 vs {:}".format( + total_epoch + ) + self.total_epoch = total_epoch + self.current_epoch = 0 + self.epoch_losses = np.zeros( + (self.total_epoch, 2), dtype=np.float32 + ) # [epoch, train/val] + self.epoch_losses = self.epoch_losses - 1 + self.epoch_accuracy = np.zeros( + (self.total_epoch, 2), dtype=np.float32 + ) # [epoch, train/val] + self.epoch_accuracy = self.epoch_accuracy - def max_accuracy(self, istrain): - if self.current_epoch <= 0: return 0 - if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max() - else: return self.epoch_accuracy[:self.current_epoch, 1].max() + def update(self, idx, train_loss, train_acc, val_loss, val_acc): + assert ( + idx >= 0 and idx < self.total_epoch + ), "total_epoch : {} , but update with the {} index".format( + self.total_epoch, idx + ) + self.epoch_losses[idx, 0] = train_loss + self.epoch_losses[idx, 1] = val_loss + self.epoch_accuracy[idx, 0] = train_acc + self.epoch_accuracy[idx, 1] = val_acc + self.current_epoch = idx + 1 + return self.max_accuracy(False) == self.epoch_accuracy[idx, 1] - def plot_curve(self, save_path): - import matplotlib - matplotlib.use('agg') - import matplotlib.pyplot as plt - title = 'the accuracy/loss curve of train/val' - dpi = 100 - width, height = 1600, 1000 - legend_fontsize = 10 - figsize = width / float(dpi), height / float(dpi) + def max_accuracy(self, istrain): + if self.current_epoch <= 0: + return 0 + if istrain: + return self.epoch_accuracy[: self.current_epoch, 0].max() + else: + return self.epoch_accuracy[: self.current_epoch, 1].max() - fig = plt.figure(figsize=figsize) - x_axis = np.array([i for i in range(self.total_epoch)]) # epochs - y_axis = np.zeros(self.total_epoch) + def plot_curve(self, save_path): + import matplotlib - plt.xlim(0, self.total_epoch) - plt.ylim(0, 100) - interval_y = 5 - interval_x = 5 - plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x)) - plt.yticks(np.arange(0, 100 + interval_y, interval_y)) - plt.grid() - plt.title(title, fontsize=20) - plt.xlabel('the training epoch', fontsize=16) - plt.ylabel('accuracy', fontsize=16) - - y_axis[:] = self.epoch_accuracy[:, 0] - plt.plot(x_axis, y_axis, color='g', linestyle='-', label='train-accuracy', lw=2) - plt.legend(loc=4, fontsize=legend_fontsize) + matplotlib.use("agg") + import matplotlib.pyplot as plt - y_axis[:] = self.epoch_accuracy[:, 1] - plt.plot(x_axis, y_axis, color='y', linestyle='-', label='valid-accuracy', lw=2) - plt.legend(loc=4, fontsize=legend_fontsize) + title = "the accuracy/loss curve of train/val" + dpi = 100 + width, height = 1600, 1000 + legend_fontsize = 10 + figsize = width / float(dpi), height / float(dpi) - - y_axis[:] = self.epoch_losses[:, 0] - plt.plot(x_axis, y_axis*50, color='g', linestyle=':', label='train-loss-x50', lw=2) - plt.legend(loc=4, fontsize=legend_fontsize) + fig = plt.figure(figsize=figsize) + x_axis = np.array([i for i in range(self.total_epoch)]) # epochs + y_axis = np.zeros(self.total_epoch) - y_axis[:] = self.epoch_losses[:, 1] - plt.plot(x_axis, y_axis*50, color='y', linestyle=':', label='valid-loss-x50', lw=2) - plt.legend(loc=4, fontsize=legend_fontsize) + plt.xlim(0, self.total_epoch) + plt.ylim(0, 100) + interval_y = 5 + interval_x = 5 + plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x)) + plt.yticks(np.arange(0, 100 + interval_y, interval_y)) + plt.grid() + plt.title(title, fontsize=20) + plt.xlabel("the training epoch", fontsize=16) + plt.ylabel("accuracy", fontsize=16) - if save_path is not None: - fig.savefig(save_path, dpi=dpi, bbox_inches='tight') - print ('---- save figure {} into {}'.format(title, save_path)) - plt.close(fig) + y_axis[:] = self.epoch_accuracy[:, 0] + plt.plot(x_axis, y_axis, color="g", linestyle="-", label="train-accuracy", lw=2) + plt.legend(loc=4, fontsize=legend_fontsize) + + y_axis[:] = self.epoch_accuracy[:, 1] + plt.plot(x_axis, y_axis, color="y", linestyle="-", label="valid-accuracy", lw=2) + plt.legend(loc=4, fontsize=legend_fontsize) + + y_axis[:] = self.epoch_losses[:, 0] + plt.plot( + x_axis, y_axis * 50, color="g", linestyle=":", label="train-loss-x50", lw=2 + ) + plt.legend(loc=4, fontsize=legend_fontsize) + + y_axis[:] = self.epoch_losses[:, 1] + plt.plot( + x_axis, y_axis * 50, color="y", linestyle=":", label="valid-loss-x50", lw=2 + ) + plt.legend(loc=4, fontsize=legend_fontsize) + + if save_path is not None: + fig.savefig(save_path, dpi=dpi, bbox_inches="tight") + print("---- save figure {} into {}".format(title, save_path)) + plt.close(fig) diff --git a/lib/log_utils/pickle_wrap.py b/lib/log_utils/pickle_wrap.py new file mode 100644 index 0000000..e1f00a4 --- /dev/null +++ b/lib/log_utils/pickle_wrap.py @@ -0,0 +1,21 @@ +##################################################### +# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 # +##################################################### +import pickle +from pathlib import Path + + +def pickle_save(obj, path): + file_path = Path(path) + file_dir = file_path.parent + file_dir.mkdir(parents=True, exist_ok=True) + with file_path.open("wb") as f: + pickle.dump(obj, f) + + +def pickle_load(path): + if not Path(path).exists(): + raise ValueError("{:} does not exists".format(path)) + with Path(path).open("rb") as f: + data = pickle.load(f) + return data diff --git a/lib/log_utils/time_utils.py b/lib/log_utils/time_utils.py index 4a0f78e..dc1ff29 100644 --- a/lib/log_utils/time_utils.py +++ b/lib/log_utils/time_utils.py @@ -4,39 +4,46 @@ import time, sys import numpy as np + def time_for_file(): - ISOTIMEFORMAT='%d-%h-at-%H-%M-%S' - return '{:}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) )) + ISOTIMEFORMAT = "%d-%h-at-%H-%M-%S" + return "{:}".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time()))) + def time_string(): - ISOTIMEFORMAT='%Y-%m-%d %X' - string = '[{:}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) )) - return string + ISOTIMEFORMAT = "%Y-%m-%d %X" + string = "[{:}]".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time()))) + return string + def time_string_short(): - ISOTIMEFORMAT='%Y%m%d' - string = '{:}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) )) - return string + ISOTIMEFORMAT = "%Y%m%d" + string = "{:}".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time()))) + return string + def time_print(string, is_print=True): - if (is_print): - print('{} : {}'.format(time_string(), string)) + if is_print: + print("{} : {}".format(time_string(), string)) + + +def convert_secs2time(epoch_time, return_str=False): + need_hour = int(epoch_time / 3600) + need_mins = int((epoch_time - 3600 * need_hour) / 60) + need_secs = int(epoch_time - 3600 * need_hour - 60 * need_mins) + if return_str: + str = "[{:02d}:{:02d}:{:02d}]".format(need_hour, need_mins, need_secs) + return str + else: + return need_hour, need_mins, need_secs -def convert_secs2time(epoch_time, return_str=False): - need_hour = int(epoch_time / 3600) - need_mins = int((epoch_time - 3600*need_hour) / 60) - need_secs = int(epoch_time - 3600*need_hour - 60*need_mins) - if return_str: - str = '[{:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs) - return str - else: - return need_hour, need_mins, need_secs def print_log(print_string, log): - #if isinstance(log, Logger): log.log('{:}'.format(print_string)) - if hasattr(log, 'log'): log.log('{:}'.format(print_string)) - else: - print("{:}".format(print_string)) - if log is not None: - log.write('{:}\n'.format(print_string)) - log.flush() + # if isinstance(log, Logger): log.log('{:}'.format(print_string)) + if hasattr(log, "log"): + log.log("{:}".format(print_string)) + else: + print("{:}".format(print_string)) + if log is not None: + log.write("{:}\n".format(print_string)) + log.flush() diff --git a/lib/utils/flop_benchmark.py b/lib/utils/flop_benchmark.py index ade64d7..df1d6ba 100644 --- a/lib/utils/flop_benchmark.py +++ b/lib/utils/flop_benchmark.py @@ -9,15 +9,19 @@ def count_parameters_in_MB(model): def count_parameters(model_or_parameters, unit="mb"): if isinstance(model_or_parameters, nn.Module): - counts = np.sum(np.prod(v.size()) for v in model_or_parameters.parameters()) + counts = sum(np.prod(v.size()) for v in model_or_parameters.parameters()) + elif isinstance(models_or_parameters, nn.Parameter): + counts = models_or_parameters.numel() + elif isinstance(models_or_parameters, (list, tuple)): + counts = sum(count_parameters(x, None) for x in models_or_parameters) else: - counts = np.sum(np.prod(v.size()) for v in model_or_parameters) - if unit.lower() == "mb": - counts /= 1e6 - elif unit.lower() == "kb": - counts /= 1e3 - elif unit.lower() == "gb": - counts /= 1e9 + counts = sum(np.prod(v.size()) for v in model_or_parameters) + if unit.lower() == "kb" or unit.lower() == "k": + counts /= 2 ** 10 # changed from 1e3 to 2^10 + elif unit.lower() == "mb" or unit.lower() == "m": + counts /= 2 ** 20 # changed from 1e6 to 2^20 + elif unit.lower() == "gb" or unit.lower() == "g": + counts /= 2 ** 30 # changed from 1e9 to 2^30 elif unit is not None: raise ValueError("Unknow unit: {:}".format(unit)) return counts diff --git a/tests/test_torch.sh b/tests/test_torch.sh new file mode 100644 index 0000000..989d682 --- /dev/null +++ b/tests/test_torch.sh @@ -0,0 +1,4 @@ +# bash ./tests/test_torch.sh + +pytest ./tests/test_torch_gpu_bugs.py::test_create -s +CUDA_VISIBLE_DEVICES="" pytest ./tests/test_torch_gpu_bugs.py::test_load -s diff --git a/tests/test_torch_gpu_bugs.py b/tests/test_torch_gpu_bugs.py new file mode 100644 index 0000000..8745e04 --- /dev/null +++ b/tests/test_torch_gpu_bugs.py @@ -0,0 +1,43 @@ +##################################################### +# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # +##################################################### +# pytest ./tests/test_torch_gpu_bugs.py::test_create +# +# CUDA_VISIBLE_DEVICES="" pytest ./tests/test_torch_gpu_bugs.py::test_load +##################################################### +import os, sys, time, torch +import pickle +import tempfile +from pathlib import Path + +lib_dir = (Path(__file__).parent / ".." / "lib").resolve() +print("library path: {:}".format(lib_dir)) +if str(lib_dir) not in sys.path: + sys.path.insert(0, str(lib_dir)) + +from trade_models.quant_transformer import QuantTransformer + + +def test_create(): + """Test the basic quant-model.""" + if not torch.cuda.is_available(): + return + quant_model = QuantTransformer(GPU=0) + temp_dir = lib_dir / ".." / "tests" / ".pytest_cache" + temp_dir.mkdir(parents=True, exist_ok=True) + temp_file = temp_dir / "quant-model.pkl" + with temp_file.open("wb") as f: + # quant_model.to(None) + quant_model.to("cpu") + # del quant_model.model + # del quant_model.train_optimizer + pickle.dump(quant_model, f) + print("save into {:}".format(temp_file)) + + +def test_load(): + temp_file = lib_dir / ".." / "tests" / ".pytest_cache" / "quant-model.pkl" + with temp_file.open("rb") as f: + model = pickle.load(f) + print(model.model) + print(model.train_optimizer)