update missing data of GDAS

2019-09-28 20:10:23 +10:00
parent 0f46f63a25
commit 180702ab8e
22 changed files with 122858 additions and 0 deletions
--- a/others/GDAS/data/GDAS.pdf
+++ b/others/GDAS/data/GDAS.pdf
--- a/others/GDAS/data/GDAS.png
+++ b/others/GDAS/data/GDAS.png
--- a/others/GDAS/data/Get-PTB-WT2.sh
+++ b/others/GDAS/data/Get-PTB-WT2.sh
@@ -0,0 +1,49 @@
+# https://github.com/salesforce/awd-lstm-lm
+echo "=== Acquiring datasets ==="
+echo "---"
+mkdir -p save
+
+mkdir -p data
+cd data
+
+echo "- Downloading WikiText-2 (WT2)"
+wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
+unzip -q wikitext-2-v1.zip
+cd wikitext-2
+mv wiki.train.tokens train.txt
+mv wiki.valid.tokens valid.txt
+mv wiki.test.tokens test.txt
+cd ..
+
+echo "- Downloading WikiText-103 (WT2)"
+wget --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip
+unzip -q wikitext-103-v1.zip
+cd wikitext-103
+mv wiki.train.tokens train.txt
+mv wiki.valid.tokens valid.txt
+mv wiki.test.tokens test.txt
+cd ..
+
+echo "- Downloading Penn Treebank (PTB)"
+wget --quiet --continue http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
+tar -xzf simple-examples.tgz
+
+mkdir -p penn
+cd penn
+mv ../simple-examples/data/ptb.train.txt train.txt
+mv ../simple-examples/data/ptb.test.txt test.txt
+mv ../simple-examples/data/ptb.valid.txt valid.txt
+cd ..
+
+echo "- Downloading Penn Treebank (Character)"
+mkdir -p pennchar
+cd pennchar
+mv ../simple-examples/data/ptb.char.train.txt train.txt
+mv ../simple-examples/data/ptb.char.test.txt test.txt
+mv ../simple-examples/data/ptb.char.valid.txt valid.txt
+cd ..
+
+rm -rf simple-examples/
+
+echo "---"
+echo "Happy language modeling :)"
--- a/others/GDAS/data/ImageNet-100.txt
+++ b/others/GDAS/data/ImageNet-100.txt
@@ -0,0 +1,100 @@
+n01532829
+n01560419
+n01580077
+n01614925
+n01664065
+n01751748
+n01871265
+n01924916
+n02087394
+n02091134
+n02091244
+n02094433
+n02097209
+n02102040
+n02102480
+n02105251
+n02106662
+n02108422
+n02108551
+n02123597
+n02165105
+n02190166
+n02268853
+n02279972
+n02408429
+n02412080
+n02443114
+n02488702
+n02509815
+n02606052
+n02701002
+n02782093
+n02794156
+n02802426
+n02804414
+n02808440
+n02906734
+n02917067
+n02950826
+n02963159
+n03017168
+n03042490
+n03045698
+n03063689
+n03065424
+n03100240
+n03109150
+n03124170
+n03131574
+n03272562
+n03345487
+n03443371
+n03461385
+n03527444
+n03690938
+n03692522
+n03721384
+n03729826
+n03792782
+n03838899
+n03843555
+n03874293
+n03877472
+n03877845
+n03908618
+n03929660
+n03930630
+n03933933
+n03970156
+n03976657
+n03982430
+n04004767
+n04065272
+n04141975
+n04146614
+n04152593
+n04192698
+n04200800
+n04204347
+n04317175
+n04326547
+n04344873
+n04370456
+n04389033
+n04501370
+n04515003
+n04542943
+n04554684
+n04562935
+n04596742
+n04597913
+n04606251
+n07583066
+n07718472
+n07734744
+n07873807
+n07880968
+n09229709
+n12768682
+n12998815
--- a/others/GDAS/data/README.md
+++ b/others/GDAS/data/README.md
@@ -0,0 +1,15 @@
+# ImageNet
+
+The class names of ImageNet-1K are in `classes.txt`.
+
+# A 100-class subset of ImageNet-1K : ImageNet-100
+
+The class names of ImageNet-100 are in `ImageNet-100.txt`.
+
+Run `python split-imagenet.py` will automatically create ImageNet-100 based on the data of ImageNet-1K. By default, we assume the data of ImageNet-1K locates at `~/.torch/ILSVRC2012`. If your data is in a different location, you need to modify line-19 and line-20 in `split-imagenet.py`.
+
+# Tiny-ImageNet
+The official website is [here](https://tiny-imagenet.herokuapp.com/). Please run `python tiny-imagenet.py` to generate the correct format of Tiny ImageNet for training.
+
+# PTB and WT2
+Run `bash Get-PTB-WT2.sh` to download the data.
--- a/others/GDAS/data/classes.txt
+++ b/others/GDAS/data/classes.txt
--- a/others/GDAS/data/compress.py
+++ b/others/GDAS/data/compress.py
@@ -0,0 +1,38 @@
+# python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-TAR tar
+# python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-ZIP zip
+import os, sys
+from pathlib import Path
+
+
+def command(prefix, cmd):
+  print ('{:}{:}'.format(prefix, cmd))
+  os.system(cmd)
+
+
+def main(source, destination, xtype):
+  assert source.exists(), '{:} does not exist'.format(source)
+  assert (source/'train').exists(), '{:}/train does not exist'.format(source)
+  assert (source/'val'  ).exists(), '{:}/val   does not exist'.format(source)
+  source      = source.resolve()
+  destination = destination.resolve()
+  destination.mkdir(parents=True, exist_ok=True)
+  os.system('rm -rf {:}'.format(destination))
+  destination.mkdir(parents=True, exist_ok=True)
+  (destination/'train').mkdir(parents=True, exist_ok=True)
+
+  subdirs = list( (source / 'train').glob('n*') )
+  assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
+  if xtype == 'tar'  : command('', 'tar -cf {:} -C {:} val'.format(destination/'val.tar', source))
+  elif xtype == 'zip': command('', '(cd {:} ; zip -r {:} val)'.format(source, destination/'val.zip'))
+  else: raise ValueError('invalid compress type : {:}'.format(xtype))
+  for idx, subdir in enumerate(subdirs):
+    name = subdir.name
+    if xtype == 'tar'  : command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -cf {:} -C {:} {:}'.format(destination/'train'/'{:}.tar'.format(name), source / 'train', name))
+    elif xtype == 'zip': command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), '(cd {:}; zip -r {:} {:})'.format(source / 'train', destination/'train'/'{:}.zip'.format(name), name))
+    else: raise ValueError('invalid compress type : {:}'.format(xtype))
+
+
+if __name__ == '__main__':
+  assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
+  source, destination = Path(sys.argv[1]), Path(sys.argv[2])
+  main(source, destination, sys.argv[3])
--- a/others/GDAS/data/data/penn/test.txt
+++ b/others/GDAS/data/data/penn/test.txt
--- a/others/GDAS/data/data/penn/train.txt
+++ b/others/GDAS/data/data/penn/train.txt
--- a/others/GDAS/data/data/penn/valid.txt
+++ b/others/GDAS/data/data/penn/valid.txt
--- a/others/GDAS/data/data/wikitext-2/test.txt
+++ b/others/GDAS/data/data/wikitext-2/test.txt
--- a/others/GDAS/data/data/wikitext-2/train.txt
+++ b/others/GDAS/data/data/wikitext-2/train.txt
--- a/others/GDAS/data/data/wikitext-2/valid.txt
+++ b/others/GDAS/data/data/wikitext-2/valid.txt
--- a/others/GDAS/data/decompress.py
+++ b/others/GDAS/data/decompress.py
@@ -0,0 +1,94 @@
+# python ./data/decompress.py $TORCH_HOME/ILSVRC2012-TAR/ ./data/data/ILSVRC2012 tar
+# python ./data/decompress.py $TORCH_HOME/ILSVRC2012-ZIP/ ./data/data/ILSVRC2012 zip
+import os, gc, sys
+from pathlib import Path
+import multiprocessing
+
+
+def execute(cmds, idx, num):
+  #print ('{:03d} :: {:03d} :: {:03d}'.format(idx, num, len(cmds)))
+  for i, cmd in enumerate(cmds):
+    if i % num == idx:
+      print ('{:03d} :: {:03d} :: {:03d}/{:03d} : {:}'.format(idx, num, i, len(cmds), cmd))
+      os.system(cmd)
+
+
+def command(prefix, cmd):
+  #print ('{:}{:}'.format(prefix, cmd))
+  #if execute: os.system(cmd)
+  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
+  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
+  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
+  xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
+  return xcmd
+
+
+def mkILSVRC2012(destination):
+  destination = destination.resolve()
+  destination.mkdir(parents=True, exist_ok=True)
+  os.system('rm -rf {:}'.format(destination))
+  destination.mkdir(parents=True, exist_ok=True)
+  (destination/'train').mkdir(parents=True, exist_ok=True)
+
+
+def main(source, destination, xtype):
+  assert source.exists(), '{:} does not exist'.format(source)
+  assert (source/'train'  ).exists(), '{:}/train does not exist'.format(source)
+  if xtype == 'tar'  : assert (source/'val.tar').exists(), '{:}/val   does not exist'.format(source)
+  elif xtype == 'zip': assert (source/'val.zip').exists(), '{:}/val   does not exist'.format(source)
+  else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
+  #assert num_process > 0, 'invalid num_process : {:}'.format(num_process)
+  source      = source.resolve()
+  mkILSVRC2012(destination)
+
+  subdirs = list( (source / 'train').glob('n*') )
+  all_commands = []
+  assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
+  for idx, subdir in enumerate(subdirs):
+    name = subdir.name
+    if xtype == 'tar'  : cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train'))
+    elif xtype == 'zip': cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'unzip -qd {:} {:}'.format(destination / 'train', source/'train'/'{:}'.format(name)))
+    else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
+    all_commands.append( cmd )
+  if xtype == 'tar'  : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
+  elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
+  else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
+  all_commands.append( cmd )
+  #print ('Collect all commands done : {:} lines'.format( len(all_commands) ))
+
+  for i, cmd in enumerate(all_commands):
+    print(cmd)
+  #  os.system(cmd)
+  #  print ('{:03d}/{:03d} : {:}'.format(i, len(all_commands), cmd))
+  #  gc.collect()
+
+  """
+  records = []
+  for i in range(num_process):
+    process = multiprocessing.Process(target=execute, args=(all_commands, i, num_process))
+    process.start()
+    records.append(process)
+  for process in records:
+    process.join()
+  """
+
+
+if __name__ == '__main__':
+  assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
+  source, destination = Path(sys.argv[1]), Path(sys.argv[2])
+  #num_process = int(sys.argv[3])
+  if sys.argv[3] == 'wget':
+    with open(source) as f:
+      content = f.readlines()
+    content = [x.strip() for x in content]
+    assert len(content) == 1000, 'invalid lines={:} from {:}'.format( len(content), source )
+    mkILSVRC2012(destination)
+    all_commands = []
+    cmd = command('make-val', 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/val.tar --directory-prefix={:} ; tar -xf {:} -C {:} ; rm {:}'.format(destination, destination / 'val.tar', destination, destination / 'val.tar'))
+    all_commands.append(cmd)
+    for idx, name in enumerate(content):
+      cmd = command('{:03d}/{:03d}-th: '.format(idx, len(content)), 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/train/{:}.tar --directory-prefix={:} ; tar -xf {:}.tar -C {:} ; rm {:}.tar'.format(name, destination / 'train', destination / 'train' / name, destination / 'train', destination / 'train' / name))
+      all_commands.append(cmd)
+    for i, cmd in enumerate(all_commands): print(cmd)
+  else:
+    main(source, destination, sys.argv[3])
--- a/others/GDAS/data/imagenet-results.png
+++ b/others/GDAS/data/imagenet-results.png
--- a/others/GDAS/data/load_data_CUHK-PEDES.py
+++ b/others/GDAS/data/load_data_CUHK-PEDES.py
@@ -0,0 +1,15 @@
+import json
+
+def main():
+  xpath = 'caption_all.json'
+  with open(xpath, 'r') as cfile:
+    cap_data = json.load(cfile)
+  print ('There are {:} images'.format( len(cap_data) ))
+  IDs = set()
+  for idx, data in enumerate( cap_data ):
+    IDs.add( data['id'] )
+    assert len( data['captions'] ) > 0, 'invalid {:}-th caption length : {:} {:}'.format(idx, data['captions'], len(data['captions']))
+  print ('IDs :: min={:}, max={:}, num={:}'.format(min(IDs), max(IDs), len(IDs)))
+
+if __name__ == '__main__':
+  main()
--- a/others/GDAS/data/logs/GDAS-F1-cifar10-cut-seed-6844.txt
+++ b/others/GDAS/data/logs/GDAS-F1-cifar10-cut-seed-6844.txt
--- a/others/GDAS/data/logs/GDAS-V1-imagenet-seed-3993.txt
+++ b/others/GDAS/data/logs/GDAS-V1-imagenet-seed-3993.txt
--- a/others/GDAS/data/ps_mem.py
+++ b/others/GDAS/data/ps_mem.py
@@ -0,0 +1,661 @@
+#!/usr/bin/env python
+
+# Try to determine how much RAM is currently being used per program.
+# Note per _program_, not per process. So for example this script
+# will report RAM used by all httpd process together. In detail it reports:
+# sum(private RAM for program processes) + sum(Shared RAM for program processes)
+# The shared RAM is problematic to calculate, and this script automatically
+# selects the most accurate method available for your kernel.
+
+# Licence: LGPLv2
+# Author:  P@draigBrady.com
+# Source:  http://www.pixelbeat.org/scripts/ps_mem.py
+
+# V1.0      06 Jul 2005     Initial release
+# V1.1      11 Aug 2006     root permission required for accuracy
+# V1.2      08 Nov 2006     Add total to output
+#                           Use KiB,MiB,... for units rather than K,M,...
+# V1.3      22 Nov 2006     Ignore shared col from /proc/$pid/statm for
+#                           2.6 kernels up to and including 2.6.9.
+#                           There it represented the total file backed extent
+# V1.4      23 Nov 2006     Remove total from output as it's meaningless
+#                           (the shared values overlap with other programs).
+#                           Display the shared column. This extra info is
+#                           useful, especially as it overlaps between programs.
+# V1.5      26 Mar 2007     Remove redundant recursion from human()
+# V1.6      05 Jun 2007     Also report number of processes with a given name.
+#                           Patch from riccardo.murri@gmail.com
+# V1.7      20 Sep 2007     Use PSS from /proc/$pid/smaps if available, which
+#                           fixes some over-estimation and allows totalling.
+#                           Enumerate the PIDs directly rather than using ps,
+#                           which fixes the possible race between reading
+#                           RSS with ps, and shared memory with this program.
+#                           Also we can show non truncated command names.
+# V1.8      28 Sep 2007     More accurate matching for stats in /proc/$pid/smaps
+#                           as otherwise could match libraries causing a crash.
+#                           Patch from patrice.bouchand.fedora@gmail.com
+# V1.9      20 Feb 2008     Fix invalid values reported when PSS is available.
+#                           Reported by Andrey Borzenkov <arvidjaar@mail.ru>
+# V3.13     17 Sep 2018
+#   http://github.com/pixelb/scripts/commits/master/scripts/ps_mem.py
+
+# Notes:
+#
+# All interpreted programs where the interpreter is started
+# by the shell or with env, will be merged to the interpreter
+# (as that's what's given to exec). For e.g. all python programs
+# starting with "#!/usr/bin/env python" will be grouped under python.
+# You can change this by using the full command line but that will
+# have the undesirable affect of splitting up programs started with
+# differing parameters (for e.g. mingetty tty[1-6]).
+#
+# For 2.6 kernels up to and including 2.6.13 and later 2.4 redhat kernels
+# (rmap vm without smaps) it can not be accurately determined how many pages
+# are shared between processes in general or within a program in our case:
+# http://lkml.org/lkml/2005/7/6/250
+# A warning is printed if overestimation is possible.
+# In addition for 2.6 kernels up to 2.6.9 inclusive, the shared
+# value in /proc/$pid/statm is the total file-backed extent of a process.
+# We ignore that, introducing more overestimation, again printing a warning.
+# Since kernel 2.6.23-rc8-mm1 PSS is available in smaps, which allows
+# us to calculate a more accurate value for the total RAM used by programs.
+#
+# Programs that use CLONE_VM without CLONE_THREAD are discounted by assuming
+# they're the only programs that have the same /proc/$PID/smaps file for
+# each instance.  This will fail if there are multiple real instances of a
+# program that then use CLONE_VM without CLONE_THREAD, or if a clone changes
+# its memory map while we're checksumming each /proc/$PID/smaps.
+#
+# I don't take account of memory allocated for a program
+# by other programs. For e.g. memory used in the X server for
+# a program could be determined, but is not.
+#
+# FreeBSD is supported if linprocfs is mounted at /compat/linux/proc/
+# FreeBSD 8.0 supports up to a level of Linux 2.6.16
+
+import getopt
+import time
+import errno
+import os
+import sys
+
+# The following exits cleanly on Ctrl-C or EPIPE
+# while treating other exceptions as before.
+def std_exceptions(etype, value, tb):
+    sys.excepthook = sys.__excepthook__
+    if issubclass(etype, KeyboardInterrupt):
+        pass
+    elif issubclass(etype, IOError) and value.errno == errno.EPIPE:
+        pass
+    else:
+        sys.__excepthook__(etype, value, tb)
+sys.excepthook = std_exceptions
+
+#
+#   Define some global variables
+#
+
+PAGESIZE = os.sysconf("SC_PAGE_SIZE") / 1024 #KiB
+our_pid = os.getpid()
+
+have_pss = 0
+have_swap_pss = 0
+
+class Unbuffered(object):
+   def __init__(self, stream):
+       self.stream = stream
+   def write(self, data):
+       self.stream.write(data)
+       self.stream.flush()
+   def close(self):
+       self.stream.close()
+   def flush(self):
+      self.stream.flush()
+
+class Proc:
+    def __init__(self):
+        uname = os.uname()
+        if uname[0] == "FreeBSD":
+            self.proc = '/compat/linux/proc'
+        else:
+            self.proc = '/proc'
+
+    def path(self, *args):
+        return os.path.join(self.proc, *(str(a) for a in args))
+
+    def open(self, *args):
+        try:
+            if sys.version_info < (3,):
+                return open(self.path(*args))
+            else:
+                return open(self.path(*args), errors='ignore')
+        except (IOError, OSError):
+            val = sys.exc_info()[1]
+            if (val.errno == errno.ENOENT or # kernel thread or process gone
+                val.errno == errno.EPERM or
+                val.errno == errno.EACCES):
+                raise LookupError
+            raise
+
+proc = Proc()
+
+
+#
+#   Functions
+#
+
+def parse_options():
+    try:
+        long_options = [
+            'split-args',
+            'help',
+            'version',
+            'total',
+            'discriminate-by-pid',
+            'swap'
+        ]
+        opts, args = getopt.getopt(sys.argv[1:], "shtdSp:w:", long_options)
+    except getopt.GetoptError:
+        sys.stderr.write(help())
+        sys.exit(3)
+
+    if len(args):
+        sys.stderr.write("Extraneous arguments: %s\n" % args)
+        sys.exit(3)
+
+    # ps_mem.py options
+    split_args = False
+    pids_to_show = None
+    discriminate_by_pid = False
+    show_swap = False
+    watch = None
+    only_total = False
+
+    for o, a in opts:
+        if o in ('-s', '--split-args'):
+            split_args = True
+        if o in ('-t', '--total'):
+            only_total = True
+        if o in ('-d', '--discriminate-by-pid'):
+            discriminate_by_pid = True
+        if o in ('-S', '--swap'):
+            show_swap = True
+        if o in ('-h', '--help'):
+            sys.stdout.write(help())
+            sys.exit(0)
+        if o in ('--version'):
+            sys.stdout.write('3.13'+'\n')
+            sys.exit(0)
+        if o in ('-p',):
+            try:
+                pids_to_show = [int(x) for x in a.split(',')]
+            except:
+                sys.stderr.write(help())
+                sys.exit(3)
+        if o in ('-w',):
+            try:
+                watch = int(a)
+            except:
+                sys.stderr.write(help())
+                sys.exit(3)
+
+    return (
+        split_args,
+        pids_to_show,
+        watch,
+        only_total,
+        discriminate_by_pid,
+        show_swap
+    )
+
+
+def help():
+    help_msg = 'Usage: ps_mem [OPTION]...\n' \
+        'Show program core memory usage\n' \
+        '\n' \
+        '  -h, -help                   Show this help\n' \
+        '  -p <pid>[,pid2,...pidN]     Only show memory usage PIDs in the '\
+        'specified list\n' \
+        '  -s, --split-args            Show and separate by, all command line'\
+        ' arguments\n' \
+        '  -t, --total                 Show only the total value\n' \
+        '  -d, --discriminate-by-pid   Show by process rather than by program\n' \
+        '  -S, --swap                  Show swap information\n' \
+        '  -w <N>                      Measure and show process memory every'\
+        ' N seconds\n'
+
+    return help_msg
+
+
+# (major,minor,release)
+def kernel_ver():
+    kv = proc.open('sys/kernel/osrelease').readline().split(".")[:3]
+    last = len(kv)
+    if last == 2:
+        kv.append('0')
+    last -= 1
+    while last > 0:
+        for char in "-_":
+            kv[last] = kv[last].split(char)[0]
+        try:
+            int(kv[last])
+        except:
+            kv[last] = 0
+        last -= 1
+    return (int(kv[0]), int(kv[1]), int(kv[2]))
+
+
+#return Private,Shared,Swap(Pss),unique_id
+#Note shared is always a subset of rss (trs is not always)
+def getMemStats(pid):
+    global have_pss
+    global have_swap_pss
+    mem_id = pid #unique
+    Private_lines = []
+    Shared_lines = []
+    Pss_lines = []
+    Rss = (int(proc.open(pid, 'statm').readline().split()[1])
+           * PAGESIZE)
+    Swap_lines = []
+    Swap_pss_lines = []
+
+    Swap = 0
+
+    if os.path.exists(proc.path(pid, 'smaps')):  # stat
+        smaps = 'smaps'
+        if os.path.exists(proc.path(pid, 'smaps_rollup')):
+            smaps = 'smaps_rollup' # faster to process
+        lines = proc.open(pid, smaps).readlines()  # open
+        # Note we checksum smaps as maps is usually but
+        # not always different for separate processes.
+        mem_id = hash(''.join(lines))
+        for line in lines:
+            if line.startswith("Shared"):
+                Shared_lines.append(line)
+            elif line.startswith("Private"):
+                Private_lines.append(line)
+            elif line.startswith("Pss"):
+                have_pss = 1
+                Pss_lines.append(line)
+            elif line.startswith("Swap:"):
+                Swap_lines.append(line)
+            elif line.startswith("SwapPss:"):
+                have_swap_pss = 1
+                Swap_pss_lines.append(line)
+        Shared = sum([int(line.split()[1]) for line in Shared_lines])
+        Private = sum([int(line.split()[1]) for line in Private_lines])
+        #Note Shared + Private = Rss above
+        #The Rss in smaps includes video card mem etc.
+        if have_pss:
+            pss_adjust = 0.5 # add 0.5KiB as this avg error due to truncation
+            Pss = sum([float(line.split()[1])+pss_adjust for line in Pss_lines])
+            Shared = Pss - Private
+        if have_swap_pss:
+            # The kernel supports SwapPss, that shows proportional swap share.
+            # Note that Swap - SwapPss is not Private Swap.
+            Swap = sum([int(line.split()[1]) for line in Swap_pss_lines])
+        else:
+            # Note that Swap = Private swap + Shared swap.
+            Swap = sum([int(line.split()[1]) for line in Swap_lines])
+    elif (2,6,1) <= kernel_ver() <= (2,6,9):
+        Shared = 0 #lots of overestimation, but what can we do?
+        Private = Rss
+    else:
+        Shared = int(proc.open(pid, 'statm').readline().split()[2])
+        Shared *= PAGESIZE
+        Private = Rss - Shared
+    return (Private, Shared, Swap, mem_id)
+
+
+def getCmdName(pid, split_args, discriminate_by_pid, exe_only=False):
+    cmdline = proc.open(pid, 'cmdline').read().split("\0")
+    if cmdline[-1] == '' and len(cmdline) > 1:
+        cmdline = cmdline[:-1]
+
+    path = proc.path(pid, 'exe')
+    try:
+        path = os.readlink(path)
+        # Some symlink targets were seen to contain NULs on RHEL 5 at least
+        # https://github.com/pixelb/scripts/pull/10, so take string up to NUL
+        path = path.split('\0')[0]
+    except OSError:
+        val = sys.exc_info()[1]
+        if (val.errno == errno.ENOENT or # either kernel thread or process gone
+            val.errno == errno.EPERM or
+            val.errno == errno.EACCES):
+            raise LookupError
+        raise
+
+    if split_args:
+        return ' '.join(cmdline).replace('\n', ' ')
+    if path.endswith(" (deleted)"):
+        path = path[:-10]
+        if os.path.exists(path):
+            path += " [updated]"
+        else:
+            #The path could be have prelink stuff so try cmdline
+            #which might have the full path present. This helped for:
+            #/usr/libexec/notification-area-applet.#prelink#.fX7LCT (deleted)
+            if os.path.exists(cmdline[0]):
+                path = cmdline[0] + " [updated]"
+            else:
+                path += " [deleted]"
+    exe = os.path.basename(path)
+    if exe_only: return exe
+
+    proc_status = proc.open(pid, 'status').readlines()
+    cmd = proc_status[0][6:-1]
+    if exe.startswith(cmd):
+        cmd = exe #show non truncated version
+        #Note because we show the non truncated name
+        #one can have separated programs as follows:
+        #584.0 KiB +   1.0 MiB =   1.6 MiB    mozilla-thunder (exe -> bash)
+        # 56.0 MiB +  22.2 MiB =  78.2 MiB    mozilla-thunderbird-bin
+    else:
+        #Lookup the parent's exe and use that if matching
+        #which will merge "Web Content" with "firefox" for example
+        ppid = 0
+        for l in range(10):
+            ps_line = proc_status[l]
+            if ps_line.startswith('PPid:'):
+                ppid = int(ps_line[6:-1])
+                break
+        if ppid:
+            p_exe = getCmdName(ppid, False, False, exe_only=True)
+            if exe == p_exe:
+                cmd = exe
+    if sys.version_info >= (3,):
+        cmd = cmd.encode(errors='replace').decode()
+    if discriminate_by_pid:
+        cmd = '%s [%d]' % (cmd, pid)
+    return cmd
+
+
+#The following matches "du -h" output
+#see also human.py
+def human(num, power="Ki", units=None):
+    if units is None:
+        powers = ["Ki", "Mi", "Gi", "Ti"]
+        while num >= 1000: #4 digits
+            num /= 1024.0
+            power = powers[powers.index(power)+1]
+        return "%.1f %sB" % (num, power)
+    else:
+        return "%.f" % ((num * 1024) / units)
+
+
+def cmd_with_count(cmd, count):
+    if count > 1:
+        return "%s (%u)" % (cmd, count)
+    else:
+        return cmd
+
+#Warn of possible inaccuracies
+#RAM:
+#2 = accurate & can total
+#1 = accurate only considering each process in isolation
+#0 = some shared mem not reported
+#-1= all shared mem not reported
+#SWAP:
+#2 = accurate & can total
+#1 = accurate only considering each process in isolation
+#-1= not available
+def val_accuracy(show_swap):
+    """http://wiki.apache.org/spamassassin/TopSharedMemoryBug"""
+    kv = kernel_ver()
+    pid = os.getpid()
+    swap_accuracy = -1
+    if kv[:2] == (2,4):
+        if proc.open('meminfo').read().find("Inact_") == -1:
+            return 1, swap_accuracy
+        return 0, swap_accuracy
+    elif kv[:2] == (2,6):
+        if os.path.exists(proc.path(pid, 'smaps')):
+            swap_accuracy = 1
+            if proc.open(pid, 'smaps').read().find("Pss:")!=-1:
+                return 2, swap_accuracy
+            else:
+                return 1, swap_accuracy
+        if (2,6,1) <= kv <= (2,6,9):
+            return -1, swap_accuracy
+        return 0, swap_accuracy
+    elif kv[0] > 2 and os.path.exists(proc.path(pid, 'smaps')):
+        swap_accuracy = 1
+        if show_swap and proc.open(pid, 'smaps').read().find("SwapPss:")!=-1:
+            swap_accuracy = 2
+        return 2, swap_accuracy
+    else:
+        return 1, swap_accuracy
+
+def show_val_accuracy( ram_inacc, swap_inacc, only_total, show_swap ):
+    level = ("Warning","Error")[only_total]
+
+    # Only show significant warnings
+    if not show_swap:
+        swap_inacc = 2
+    elif only_total:
+        ram_inacc = 2
+
+    if ram_inacc == -1:
+        sys.stderr.write(
+         "%s: Shared memory is not reported by this system.\n" % level
+        )
+        sys.stderr.write(
+         "Values reported will be too large, and totals are not reported\n"
+        )
+    elif ram_inacc == 0:
+        sys.stderr.write(
+         "%s: Shared memory is not reported accurately by this system.\n" % level
+        )
+        sys.stderr.write(
+         "Values reported could be too large, and totals are not reported\n"
+        )
+    elif ram_inacc == 1:
+        sys.stderr.write(
+         "%s: Shared memory is slightly over-estimated by this system\n"
+         "for each program, so totals are not reported.\n" % level
+        )
+
+    if swap_inacc == -1:
+        sys.stderr.write(
+         "%s: Swap is not reported by this system.\n" % level
+        )
+    elif swap_inacc == 1:
+        sys.stderr.write(
+         "%s: Swap is over-estimated by this system for each program,\n"
+         "so totals are not reported.\n" % level
+        )
+
+    sys.stderr.close()
+    if only_total:
+        if show_swap:
+            accuracy = swap_inacc
+        else:
+            accuracy = ram_inacc
+        if accuracy != 2:
+            sys.exit(1)
+
+
+def get_memory_usage(pids_to_show, split_args, discriminate_by_pid,
+                     include_self=False, only_self=False):
+    cmds = {}
+    shareds = {}
+    mem_ids = {}
+    count = {}
+    swaps = {}
+    for pid in os.listdir(proc.path('')):
+        if not pid.isdigit():
+            continue
+        pid = int(pid)
+
+        # Some filters
+        if only_self and pid != our_pid:
+            continue
+        if pid == our_pid and not include_self:
+            continue
+        if pids_to_show is not None and pid not in pids_to_show:
+            continue
+
+        try:
+            cmd = getCmdName(pid, split_args, discriminate_by_pid)
+        except LookupError:
+            #operation not permitted
+            #kernel threads don't have exe links or
+            #process gone
+            continue
+
+        try:
+            private, shared, swap, mem_id = getMemStats(pid)
+        except RuntimeError:
+            continue #process gone
+        if shareds.get(cmd):
+            if have_pss: #add shared portion of PSS together
+                shareds[cmd] += shared
+            elif shareds[cmd] < shared: #just take largest shared val
+                shareds[cmd] = shared
+        else:
+            shareds[cmd] = shared
+        cmds[cmd] = cmds.setdefault(cmd, 0) + private
+        if cmd in count:
+            count[cmd] += 1
+        else:
+            count[cmd] = 1
+        mem_ids.setdefault(cmd, {}).update({mem_id: None})
+
+        # Swap (overcounting for now...)
+        swaps[cmd] = swaps.setdefault(cmd, 0) + swap
+
+    # Total swaped mem for each program
+    total_swap = 0
+
+    # Add shared mem for each program
+    total = 0
+
+    for cmd in cmds:
+        cmd_count = count[cmd]
+        if len(mem_ids[cmd]) == 1 and cmd_count > 1:
+            # Assume this program is using CLONE_VM without CLONE_THREAD
+            # so only account for one of the processes
+            cmds[cmd] /= cmd_count
+            if have_pss:
+                shareds[cmd] /= cmd_count
+        cmds[cmd] = cmds[cmd] + shareds[cmd]
+        total += cmds[cmd]  # valid if PSS available
+        total_swap += swaps[cmd]
+
+    sorted_cmds = sorted(cmds.items(), key=lambda x:x[1])
+    sorted_cmds = [x for x in sorted_cmds if x[1]]
+
+    return sorted_cmds, shareds, count, total, swaps, total_swap
+
+def print_header(show_swap, discriminate_by_pid):
+    output_string = " Private  +   Shared  =  RAM used"
+    if show_swap:
+        output_string += "   Swap used"
+    output_string += "\tProgram"
+    if discriminate_by_pid:
+        output_string += "[pid]"
+    output_string += "\n\n"
+    sys.stdout.write(output_string)
+
+
+def print_memory_usage(sorted_cmds, shareds, count, total, swaps, total_swap,
+                       show_swap):
+    for cmd in sorted_cmds:
+
+        output_string = "%9s + %9s = %9s"
+        output_data = (human(cmd[1]-shareds[cmd[0]]),
+                       human(shareds[cmd[0]]), human(cmd[1]))
+        if show_swap:
+            output_string += "   %9s"
+            output_data += (human(swaps[cmd[0]]),)
+        output_string += "\t%s\n"
+        output_data += (cmd_with_count(cmd[0], count[cmd[0]]),)
+
+        sys.stdout.write(output_string % output_data)
+
+    # Only show totals if appropriate
+    if have_swap_pss and show_swap:  # kernel will have_pss
+        sys.stdout.write("%s\n%s%9s%s%9s\n%s\n" %
+                         ("-" * 45, " " * 24, human(total), " " * 3,
+                          human(total_swap), "=" * 45))
+    elif have_pss:
+        sys.stdout.write("%s\n%s%9s\n%s\n" %
+                         ("-" * 33, " " * 24, human(total), "=" * 33))
+
+
+def verify_environment(pids_to_show):
+    if os.geteuid() != 0 and not pids_to_show:
+        sys.stderr.write("Sorry, root permission required, or specify pids with -p\n")
+        sys.stderr.close()
+        sys.exit(1)
+
+    try:
+        kernel_ver()
+    except (IOError, OSError):
+        val = sys.exc_info()[1]
+        if val.errno == errno.ENOENT:
+            sys.stderr.write(
+              "Couldn't access " + proc.path('') + "\n"
+              "Only GNU/Linux and FreeBSD (with linprocfs) are supported\n")
+            sys.exit(2)
+        else:
+            raise
+
+def main():
+    # Force the stdout and stderr streams to be unbuffered
+    sys.stdout = Unbuffered(sys.stdout)
+    sys.stderr = Unbuffered(sys.stderr)
+
+    split_args, pids_to_show, watch, only_total, discriminate_by_pid, \
+    show_swap = parse_options()
+
+    verify_environment(pids_to_show)
+
+    if not only_total:
+        print_header(show_swap, discriminate_by_pid)
+
+    if watch is not None:
+        try:
+            sorted_cmds = True
+            while sorted_cmds:
+                sorted_cmds, shareds, count, total, swaps, total_swap = \
+                    get_memory_usage(pids_to_show, split_args,
+                                     discriminate_by_pid)
+                if only_total and show_swap and have_swap_pss:
+                    sys.stdout.write(human(total_swap, units=1)+'\n')
+                elif only_total and not show_swap and have_pss:
+                    sys.stdout.write(human(total, units=1)+'\n')
+                elif not only_total:
+                    print_memory_usage(sorted_cmds, shareds, count, total,
+                                       swaps, total_swap, show_swap)
+
+                sys.stdout.flush()
+                time.sleep(watch)
+            else:
+                sys.stdout.write('Process does not exist anymore.\n')
+        except KeyboardInterrupt:
+            pass
+    else:
+        # This is the default behavior
+        sorted_cmds, shareds, count, total, swaps, total_swap = \
+            get_memory_usage(pids_to_show, split_args,
+                             discriminate_by_pid)
+        if only_total and show_swap and have_swap_pss:
+            sys.stdout.write(human(total_swap, units=1)+'\n')
+        elif only_total and not show_swap and have_pss:
+            sys.stdout.write(human(total, units=1)+'\n')
+        elif not only_total:
+            print_memory_usage(sorted_cmds, shareds, count, total, swaps,
+                               total_swap, show_swap)
+
+    # We must close explicitly, so that any EPIPE exception
+    # is handled by our excepthook, rather than the default
+    # one which is reenabled after this script finishes.
+    sys.stdout.close()
+
+    ram_accuracy, swap_accuracy = val_accuracy( show_swap )
+    show_val_accuracy( ram_accuracy, swap_accuracy, only_total, show_swap )
+
+if __name__ == '__main__':
+    main()
--- a/others/GDAS/data/show-queue.sh
+++ b/others/GDAS/data/show-queue.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Show High-priority
+echo '-------------------------------'
+echo 'Queue in high-priority clusters'
+echo '-------------------------------'
+queues="yq01-v100-box-1-8 yq01-v100-box-idl-2-8"
+for queue in ${queues}
+do
+  showjob -p ${queue}
+  sleep 0.3s
+done
+
+echo '-------------------------------'
+echo 'Queue in low-priority clusters'
+echo '-------------------------------'
+
+#queues="yq01-p40-3-8 yq01-p40-2-8 yq01-p40-box-1-8 yq01-v100-box-2-8"
+queues="yq01-p40-3-8 yq01-p40-box-1-8 yq01-v100-box-2-8"
+for queue in ${queues}
+do
+  showjob -p ${queue}
+  sleep 0.3s
+done
+
+
+echo '-------------------------------'
+echo 'Queue for other IDL teams'
+echo '-------------------------------'
+
+queues="yq01-v100-box-idl-8 yq01-v100-box-idl-3-8"
+for queue in ${queues}
+do
+  showjob -p ${queue}
+  sleep 0.3s
+done
--- a/others/GDAS/data/split-imagenet.py
+++ b/others/GDAS/data/split-imagenet.py
@@ -0,0 +1,37 @@
+import os, sys, random
+from pathlib import Path
+
+
+def sample_100_cls():
+  with open('classes.txt') as f:
+    content = f.readlines()
+  content = [x.strip() for x in content] 
+  random.seed(111)
+  classes = random.sample(content, 100)
+  classes.sort()
+  with open('ImageNet-100.txt', 'w') as f:
+    for cls in classes: f.write('{:}\n'.format(cls))
+  print('-'*100)
+
+
+if __name__ == "__main__":
+  #sample_100_cls()
+  IN1K_root = Path.home() / '.torch' / 'ILSVRC2012'
+  IN100_root = Path.home() / '.torch' / 'ILSVRC2012-100'
+  assert IN1K_root.exists(), 'ImageNet directory does not exist : {:}'.format(IN1K_root)
+  print ('Create soft link from ImageNet directory into : {:}'.format(IN100_root))
+  with open('ImageNet-100.txt', 'r') as f:
+    classes = f.readlines()
+  classes = [x.strip() for x in classes]
+  for sub in ['train', 'val']:
+    xdir = IN100_root / sub
+    if not xdir.exists(): xdir.mkdir(parents=True, exist_ok=True)
+
+  for idx, cls in enumerate(classes):
+    xdir = IN1K_root / 'train' / cls
+    assert xdir.exists(), '{:} does not exist'.format(xdir)
+    os.system('ln -s {:} {:}'.format(xdir, IN100_root / 'train' / cls))
+
+    xdir = IN1K_root / 'val' / cls
+    assert xdir.exists(), '{:} does not exist'.format(xdir)
+    os.system('ln -s {:} {:}'.format(xdir, IN100_root / 'val' / cls))
--- a/others/GDAS/data/tiny-imagenet.py
+++ b/others/GDAS/data/tiny-imagenet.py
@@ -0,0 +1,53 @@
+import os, sys
+from pathlib import Path
+
+url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
+
+def load_val():
+  path = 'tiny-imagenet-200/val/val_annotations.txt'
+  cfile = open(path, 'r')
+  content = cfile.readlines()
+  content = [x.strip().split('\t') for x in content]
+  cfile.close()
+  images = [x[0] for x in content]
+  labels = [x[1] for x in content]
+  return images, labels
+
+def main():
+  os.system("wget {:}".format(url))
+  os.system("rm -rf tiny-imagenet-200")
+  os.system("unzip -o tiny-imagenet-200.zip")
+  images, labels = load_val()
+  savedir = 'tiny-imagenet-200/new_val'
+  if not os.path.exists(savedir): os.makedirs(savedir)
+  for image, label in zip(images, labels):
+    cdir = savedir + '/' + label
+    if not os.path.exists(cdir): os.makedirs(cdir)
+    ori_path = 'tiny-imagenet-200/val/images/' + image
+    os.system("cp {:} {:}".format(ori_path, cdir))
+  os.system("rm -rf tiny-imagenet-200/val")
+  os.system("mv {:} tiny-imagenet-200/val".format(savedir))
+
+def generate_salt_pepper():
+  targetdir = Path('tiny-imagenet-200/val')
+  noisedir  = Path('tiny-imagenet-200/val-noise')
+  assert targetdir.exists(), '{:} does not exist'.format(targetdir)
+  from imgaug import augmenters as iaa
+  import cv2
+  aug = iaa.SaltAndPepper(p=0.2)
+
+  for sub in targetdir.iterdir():
+    if not sub.is_dir(): continue
+    subdir = noisedir / sub.name
+    if not subdir.exists(): os.makedirs('{:}'.format(subdir))
+    images = sub.glob('*.JPEG')
+    for image in images:
+      I = cv2.imread(str(image))
+      Inoise = aug.augment_image(I)
+      savepath = subdir / image.name
+      cv2.imwrite(str(savepath), Inoise)
+    print ('{:} done'.format(sub))
+
+if __name__ == "__main__":
+  #main()
+  generate_salt_pepper()