update missing data of GDAS

2019-09-28 20:10:23 +10:00
parent 0f46f63a25
commit 180702ab8e
22 changed files with 122858 additions and 0 deletions
--- a/others/GDAS/data/GDAS.pdf
+++ b/others/GDAS/data/GDAS.pdf
--- a/others/GDAS/data/GDAS.png
+++ b/others/GDAS/data/GDAS.png
--- a/others/GDAS/data/Get-PTB-WT2.sh
+++ b/others/GDAS/data/Get-PTB-WT2.sh
@@ -0,0 +1,49 @@
 # https://github.com/salesforce/awd-lstm-lm
 echo "=== Acquiring datasets ==="
 echo "---"
 mkdir -p save
 mkdir -p data
 cd data
 echo "- Downloading WikiText-2 (WT2)"
 wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
 unzip -q wikitext-2-v1.zip
 cd wikitext-2
 mv wiki.train.tokens train.txt
 mv wiki.valid.tokens valid.txt
 mv wiki.test.tokens test.txt
 cd ..
 echo "- Downloading WikiText-103 (WT2)"
 wget --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip
 unzip -q wikitext-103-v1.zip
 cd wikitext-103
 mv wiki.train.tokens train.txt
 mv wiki.valid.tokens valid.txt
 mv wiki.test.tokens test.txt
 cd ..
 echo "- Downloading Penn Treebank (PTB)"
 wget --quiet --continue http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
 tar -xzf simple-examples.tgz
 mkdir -p penn
 cd penn
 mv ../simple-examples/data/ptb.train.txt train.txt
 mv ../simple-examples/data/ptb.test.txt test.txt
 mv ../simple-examples/data/ptb.valid.txt valid.txt
 cd ..
 echo "- Downloading Penn Treebank (Character)"
 mkdir -p pennchar
 cd pennchar
 mv ../simple-examples/data/ptb.char.train.txt train.txt
 mv ../simple-examples/data/ptb.char.test.txt test.txt
 mv ../simple-examples/data/ptb.char.valid.txt valid.txt
 cd ..
 rm -rf simple-examples/
 echo "---"
 echo "Happy language modeling :)"
--- a/others/GDAS/data/ImageNet-100.txt
+++ b/others/GDAS/data/ImageNet-100.txt
@@ -0,0 +1,100 @@
 n01532829
 n01560419
 n01580077
 n01614925
 n01664065
 n01751748
 n01871265
 n01924916
 n02087394
 n02091134
 n02091244
 n02094433
 n02097209
 n02102040
 n02102480
 n02105251
 n02106662
 n02108422
 n02108551
 n02123597
 n02165105
 n02190166
 n02268853
 n02279972
 n02408429
 n02412080
 n02443114
 n02488702
 n02509815
 n02606052
 n02701002
 n02782093
 n02794156
 n02802426
 n02804414
 n02808440
 n02906734
 n02917067
 n02950826
 n02963159
 n03017168
 n03042490
 n03045698
 n03063689
 n03065424
 n03100240
 n03109150
 n03124170
 n03131574
 n03272562
 n03345487
 n03443371
 n03461385
 n03527444
 n03690938
 n03692522
 n03721384
 n03729826
 n03792782
 n03838899
 n03843555
 n03874293
 n03877472
 n03877845
 n03908618
 n03929660
 n03930630
 n03933933
 n03970156
 n03976657
 n03982430
 n04004767
 n04065272
 n04141975
 n04146614
 n04152593
 n04192698
 n04200800
 n04204347
 n04317175
 n04326547
 n04344873
 n04370456
 n04389033
 n04501370
 n04515003
 n04542943
 n04554684
 n04562935
 n04596742
 n04597913
 n04606251
 n07583066
 n07718472
 n07734744
 n07873807
 n07880968
 n09229709
 n12768682
 n12998815
--- a/others/GDAS/data/README.md
+++ b/others/GDAS/data/README.md
@@ -0,0 +1,15 @@
 # ImageNet
 The class names of ImageNet-1K are in `classes.txt`.
 # A 100-class subset of ImageNet-1K : ImageNet-100
 The class names of ImageNet-100 are in `ImageNet-100.txt`.
 Run `python split-imagenet.py` will automatically create ImageNet-100 based on the data of ImageNet-1K. By default, we assume the data of ImageNet-1K locates at `~/.torch/ILSVRC2012`. If your data is in a different location, you need to modify line-19 and line-20 in `split-imagenet.py`.
 # Tiny-ImageNet
 The official website is [here](https://tiny-imagenet.herokuapp.com/). Please run `python tiny-imagenet.py` to generate the correct format of Tiny ImageNet for training.
 # PTB and WT2
 Run `bash Get-PTB-WT2.sh` to download the data.
--- a/others/GDAS/data/classes.txt
+++ b/others/GDAS/data/classes.txt
--- a/others/GDAS/data/compress.py
+++ b/others/GDAS/data/compress.py
@@ -0,0 +1,38 @@
 # python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-TAR tar
 # python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-ZIP zip
 import os, sys
 from pathlib import Path
 def command(prefix, cmd):
  print ('{:}{:}'.format(prefix, cmd))
  os.system(cmd)
 def main(source, destination, xtype):
  assert source.exists(), '{:} does not exist'.format(source)
  assert (source/'train').exists(), '{:}/train does not exist'.format(source)
  assert (source/'val'  ).exists(), '{:}/val   does not exist'.format(source)
  source      = source.resolve()
  destination = destination.resolve()
  destination.mkdir(parents=True, exist_ok=True)
  os.system('rm -rf {:}'.format(destination))
  destination.mkdir(parents=True, exist_ok=True)
  (destination/'train').mkdir(parents=True, exist_ok=True)
  subdirs = list( (source / 'train').glob('n*') )
  assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
  if xtype == 'tar'  : command('', 'tar -cf {:} -C {:} val'.format(destination/'val.tar', source))
  elif xtype == 'zip': command('', '(cd {:} ; zip -r {:} val)'.format(source, destination/'val.zip'))
  else: raise ValueError('invalid compress type : {:}'.format(xtype))
  for idx, subdir in enumerate(subdirs):
    name = subdir.name
    if xtype == 'tar'  : command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -cf {:} -C {:} {:}'.format(destination/'train'/'{:}.tar'.format(name), source / 'train', name))
    elif xtype == 'zip': command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), '(cd {:}; zip -r {:} {:})'.format(source / 'train', destination/'train'/'{:}.zip'.format(name), name))
    else: raise ValueError('invalid compress type : {:}'.format(xtype))
 if __name__ == '__main__':
  assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
  source, destination = Path(sys.argv[1]), Path(sys.argv[2])
  main(source, destination, sys.argv[3])
--- a/others/GDAS/data/data/penn/test.txt
+++ b/others/GDAS/data/data/penn/test.txt
--- a/others/GDAS/data/data/penn/train.txt
+++ b/others/GDAS/data/data/penn/train.txt
--- a/others/GDAS/data/data/penn/valid.txt
+++ b/others/GDAS/data/data/penn/valid.txt
--- a/others/GDAS/data/data/wikitext-2/test.txt
+++ b/others/GDAS/data/data/wikitext-2/test.txt
--- a/others/GDAS/data/data/wikitext-2/train.txt
+++ b/others/GDAS/data/data/wikitext-2/train.txt
--- a/others/GDAS/data/data/wikitext-2/valid.txt
+++ b/others/GDAS/data/data/wikitext-2/valid.txt
--- a/others/GDAS/data/decompress.py
+++ b/others/GDAS/data/decompress.py
@@ -0,0 +1,94 @@
 # python ./data/decompress.py $TORCH_HOME/ILSVRC2012-TAR/ ./data/data/ILSVRC2012 tar
 # python ./data/decompress.py $TORCH_HOME/ILSVRC2012-ZIP/ ./data/data/ILSVRC2012 zip
 import os, gc, sys
 from pathlib import Path
 import multiprocessing
 def execute(cmds, idx, num):
  #print ('{:03d} :: {:03d} :: {:03d}'.format(idx, num, len(cmds)))
  for i, cmd in enumerate(cmds):
    if i % num == idx:
      print ('{:03d} :: {:03d} :: {:03d}/{:03d} : {:}'.format(idx, num, i, len(cmds), cmd))
      os.system(cmd)
 def command(prefix, cmd):
  #print ('{:}{:}'.format(prefix, cmd))
  #if execute: os.system(cmd)
  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
  xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
  return xcmd
 def mkILSVRC2012(destination):
  destination = destination.resolve()
  destination.mkdir(parents=True, exist_ok=True)
  os.system('rm -rf {:}'.format(destination))
  destination.mkdir(parents=True, exist_ok=True)
  (destination/'train').mkdir(parents=True, exist_ok=True)
 def main(source, destination, xtype):
  assert source.exists(), '{:} does not exist'.format(source)
  assert (source/'train'  ).exists(), '{:}/train does not exist'.format(source)
  if xtype == 'tar'  : assert (source/'val.tar').exists(), '{:}/val   does not exist'.format(source)
  elif xtype == 'zip': assert (source/'val.zip').exists(), '{:}/val   does not exist'.format(source)
  else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
  #assert num_process > 0, 'invalid num_process : {:}'.format(num_process)
  source      = source.resolve()
  mkILSVRC2012(destination)
  subdirs = list( (source / 'train').glob('n*') )
  all_commands = []
  assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
  for idx, subdir in enumerate(subdirs):
    name = subdir.name
    if xtype == 'tar'  : cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train'))
    elif xtype == 'zip': cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'unzip -qd {:} {:}'.format(destination / 'train', source/'train'/'{:}'.format(name)))
    else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
    all_commands.append( cmd )
  if xtype == 'tar'  : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
  elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
  else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
  all_commands.append( cmd )
  #print ('Collect all commands done : {:} lines'.format( len(all_commands) ))
  for i, cmd in enumerate(all_commands):
    print(cmd)
  #  os.system(cmd)
  #  print ('{:03d}/{:03d} : {:}'.format(i, len(all_commands), cmd))
  #  gc.collect()
  """
  records = []
  for i in range(num_process):
    process = multiprocessing.Process(target=execute, args=(all_commands, i, num_process))
    process.start()
    records.append(process)
  for process in records:
    process.join()
  """
 if __name__ == '__main__':
  assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
  source, destination = Path(sys.argv[1]), Path(sys.argv[2])
  #num_process = int(sys.argv[3])
  if sys.argv[3] == 'wget':
    with open(source) as f:
      content = f.readlines()
    content = [x.strip() for x in content]
    assert len(content) == 1000, 'invalid lines={:} from {:}'.format( len(content), source )
    mkILSVRC2012(destination)
    all_commands = []
    cmd = command('make-val', 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/val.tar --directory-prefix={:} ; tar -xf {:} -C {:} ; rm {:}'.format(destination, destination / 'val.tar', destination, destination / 'val.tar'))
    all_commands.append(cmd)
    for idx, name in enumerate(content):
      cmd = command('{:03d}/{:03d}-th: '.format(idx, len(content)), 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/train/{:}.tar --directory-prefix={:} ; tar -xf {:}.tar -C {:} ; rm {:}.tar'.format(name, destination / 'train', destination / 'train' / name, destination / 'train', destination / 'train' / name))
      all_commands.append(cmd)
    for i, cmd in enumerate(all_commands): print(cmd)
  else:
    main(source, destination, sys.argv[3])
--- a/others/GDAS/data/imagenet-results.png
+++ b/others/GDAS/data/imagenet-results.png
--- a/others/GDAS/data/load_data_CUHK-PEDES.py
+++ b/others/GDAS/data/load_data_CUHK-PEDES.py
@@ -0,0 +1,15 @@
 import json
 def main():
  xpath = 'caption_all.json'
  with open(xpath, 'r') as cfile:
    cap_data = json.load(cfile)
  print ('There are {:} images'.format( len(cap_data) ))
  IDs = set()
  for idx, data in enumerate( cap_data ):
    IDs.add( data['id'] )
    assert len( data['captions'] ) > 0, 'invalid {:}-th caption length : {:} {:}'.format(idx, data['captions'], len(data['captions']))
  print ('IDs :: min={:}, max={:}, num={:}'.format(min(IDs), max(IDs), len(IDs)))
 if __name__ == '__main__':
  main()
--- a/others/GDAS/data/logs/GDAS-F1-cifar10-cut-seed-6844.txt
+++ b/others/GDAS/data/logs/GDAS-F1-cifar10-cut-seed-6844.txt
--- a/others/GDAS/data/logs/GDAS-V1-imagenet-seed-3993.txt
+++ b/others/GDAS/data/logs/GDAS-V1-imagenet-seed-3993.txt
--- a/others/GDAS/data/ps_mem.py
+++ b/others/GDAS/data/ps_mem.py
@@ -0,0 +1,661 @@
 #!/usr/bin/env python
 # Try to determine how much RAM is currently being used per program.
 # Note per _program_, not per process. So for example this script
 # will report RAM used by all httpd process together. In detail it reports:
 # sum(private RAM for program processes) + sum(Shared RAM for program processes)
 # The shared RAM is problematic to calculate, and this script automatically
 # selects the most accurate method available for your kernel.
 # Licence: LGPLv2
 # Author:  P@draigBrady.com
 # Source:  http://www.pixelbeat.org/scripts/ps_mem.py
 # V1.0      06 Jul 2005     Initial release
 # V1.1      11 Aug 2006     root permission required for accuracy
 # V1.2      08 Nov 2006     Add total to output
 #                           Use KiB,MiB,... for units rather than K,M,...
 # V1.3      22 Nov 2006     Ignore shared col from /proc/$pid/statm for
 #                           2.6 kernels up to and including 2.6.9.
 #                           There it represented the total file backed extent
 # V1.4      23 Nov 2006     Remove total from output as it's meaningless
 #                           (the shared values overlap with other programs).
 #                           Display the shared column. This extra info is
 #                           useful, especially as it overlaps between programs.
 # V1.5      26 Mar 2007     Remove redundant recursion from human()
 # V1.6      05 Jun 2007     Also report number of processes with a given name.
 #                           Patch from riccardo.murri@gmail.com
 # V1.7      20 Sep 2007     Use PSS from /proc/$pid/smaps if available, which
 #                           fixes some over-estimation and allows totalling.
 #                           Enumerate the PIDs directly rather than using ps,
 #                           which fixes the possible race between reading
 #                           RSS with ps, and shared memory with this program.
 #                           Also we can show non truncated command names.
 # V1.8      28 Sep 2007     More accurate matching for stats in /proc/$pid/smaps
 #                           as otherwise could match libraries causing a crash.
 #                           Patch from patrice.bouchand.fedora@gmail.com
 # V1.9      20 Feb 2008     Fix invalid values reported when PSS is available.
 #                           Reported by Andrey Borzenkov <arvidjaar@mail.ru>
 # V3.13     17 Sep 2018
 #   http://github.com/pixelb/scripts/commits/master/scripts/ps_mem.py
 # Notes:
 #
 # All interpreted programs where the interpreter is started
 # by the shell or with env, will be merged to the interpreter
 # (as that's what's given to exec). For e.g. all python programs
 # starting with "#!/usr/bin/env python" will be grouped under python.
 # You can change this by using the full command line but that will
 # have the undesirable affect of splitting up programs started with
 # differing parameters (for e.g. mingetty tty[1-6]).
 #
 # For 2.6 kernels up to and including 2.6.13 and later 2.4 redhat kernels
 # (rmap vm without smaps) it can not be accurately determined how many pages
 # are shared between processes in general or within a program in our case:
 # http://lkml.org/lkml/2005/7/6/250
 # A warning is printed if overestimation is possible.
 # In addition for 2.6 kernels up to 2.6.9 inclusive, the shared
 # value in /proc/$pid/statm is the total file-backed extent of a process.
 # We ignore that, introducing more overestimation, again printing a warning.
 # Since kernel 2.6.23-rc8-mm1 PSS is available in smaps, which allows
 # us to calculate a more accurate value for the total RAM used by programs.
 #
 # Programs that use CLONE_VM without CLONE_THREAD are discounted by assuming
 # they're the only programs that have the same /proc/$PID/smaps file for
 # each instance.  This will fail if there are multiple real instances of a
 # program that then use CLONE_VM without CLONE_THREAD, or if a clone changes
 # its memory map while we're checksumming each /proc/$PID/smaps.
 #
 # I don't take account of memory allocated for a program
 # by other programs. For e.g. memory used in the X server for
 # a program could be determined, but is not.
 #
 # FreeBSD is supported if linprocfs is mounted at /compat/linux/proc/
 # FreeBSD 8.0 supports up to a level of Linux 2.6.16
 import getopt
 import time
 import errno
 import os
 import sys
 # The following exits cleanly on Ctrl-C or EPIPE
 # while treating other exceptions as before.
 def std_exceptions(etype, value, tb):
    sys.excepthook = sys.__excepthook__
    if issubclass(etype, KeyboardInterrupt):
        pass
    elif issubclass(etype, IOError) and value.errno == errno.EPIPE:
        pass
    else:
        sys.__excepthook__(etype, value, tb)
 sys.excepthook = std_exceptions
 #
 #   Define some global variables
 #
 PAGESIZE = os.sysconf("SC_PAGE_SIZE") / 1024 #KiB
 our_pid = os.getpid()
 have_pss = 0
 have_swap_pss = 0
 class Unbuffered(object):
   def __init__(self, stream):
       self.stream = stream
   def write(self, data):
       self.stream.write(data)
       self.stream.flush()
   def close(self):
       self.stream.close()
   def flush(self):
      self.stream.flush()
 class Proc:
    def __init__(self):
        uname = os.uname()
        if uname[0] == "FreeBSD":
            self.proc = '/compat/linux/proc'
        else:
            self.proc = '/proc'
    def path(self, *args):
        return os.path.join(self.proc, *(str(a) for a in args))
    def open(self, *args):
        try:
            if sys.version_info < (3,):
                return open(self.path(*args))
            else:
                return open(self.path(*args), errors='ignore')
        except (IOError, OSError):
            val = sys.exc_info()[1]
            if (val.errno == errno.ENOENT or # kernel thread or process gone
                val.errno == errno.EPERM or
                val.errno == errno.EACCES):
                raise LookupError
            raise
 proc = Proc()
 #
 #   Functions
 #
 def parse_options():
    try:
        long_options = [
            'split-args',
            'help',
            'version',
            'total',
            'discriminate-by-pid',
            'swap'
        ]
        opts, args = getopt.getopt(sys.argv[1:], "shtdSp:w:", long_options)
    except getopt.GetoptError:
        sys.stderr.write(help())
        sys.exit(3)
    if len(args):
        sys.stderr.write("Extraneous arguments: %s\n" % args)
        sys.exit(3)
    # ps_mem.py options
    split_args = False
    pids_to_show = None
    discriminate_by_pid = False
    show_swap = False
    watch = None
    only_total = False
    for o, a in opts:
        if o in ('-s', '--split-args'):
            split_args = True
        if o in ('-t', '--total'):
            only_total = True
        if o in ('-d', '--discriminate-by-pid'):
            discriminate_by_pid = True
        if o in ('-S', '--swap'):
            show_swap = True
        if o in ('-h', '--help'):
            sys.stdout.write(help())
            sys.exit(0)
        if o in ('--version'):
            sys.stdout.write('3.13'+'\n')
            sys.exit(0)
        if o in ('-p',):
            try:
                pids_to_show = [int(x) for x in a.split(',')]
            except:
                sys.stderr.write(help())
                sys.exit(3)
        if o in ('-w',):
            try:
                watch = int(a)
            except:
                sys.stderr.write(help())
                sys.exit(3)
    return (
        split_args,
        pids_to_show,
        watch,
        only_total,
        discriminate_by_pid,
        show_swap
    )
 def help():
    help_msg = 'Usage: ps_mem [OPTION]...\n' \
        'Show program core memory usage\n' \
        '\n' \
        '  -h, -help                   Show this help\n' \
        '  -p <pid>[,pid2,...pidN]     Only show memory usage PIDs in the '\
        'specified list\n' \
        '  -s, --split-args            Show and separate by, all command line'\
        ' arguments\n' \
        '  -t, --total                 Show only the total value\n' \
        '  -d, --discriminate-by-pid   Show by process rather than by program\n' \
        '  -S, --swap                  Show swap information\n' \
        '  -w <N>                      Measure and show process memory every'\
        ' N seconds\n'
    return help_msg
 # (major,minor,release)
 def kernel_ver():
    kv = proc.open('sys/kernel/osrelease').readline().split(".")[:3]
    last = len(kv)
    if last == 2:
        kv.append('0')
    last -= 1
    while last > 0:
        for char in "-_":
            kv[last] = kv[last].split(char)[0]
        try:
            int(kv[last])
        except:
            kv[last] = 0
        last -= 1
    return (int(kv[0]), int(kv[1]), int(kv[2]))
 #return Private,Shared,Swap(Pss),unique_id
 #Note shared is always a subset of rss (trs is not always)
 def getMemStats(pid):
    global have_pss
    global have_swap_pss
    mem_id = pid #unique
    Private_lines = []
    Shared_lines = []
    Pss_lines = []
    Rss = (int(proc.open(pid, 'statm').readline().split()[1])
           * PAGESIZE)
    Swap_lines = []
    Swap_pss_lines = []
    Swap = 0
    if os.path.exists(proc.path(pid, 'smaps')):  # stat
        smaps = 'smaps'
        if os.path.exists(proc.path(pid, 'smaps_rollup')):
            smaps = 'smaps_rollup' # faster to process
        lines = proc.open(pid, smaps).readlines()  # open
        # Note we checksum smaps as maps is usually but
        # not always different for separate processes.
        mem_id = hash(''.join(lines))
        for line in lines:
            if line.startswith("Shared"):
                Shared_lines.append(line)
            elif line.startswith("Private"):
                Private_lines.append(line)
            elif line.startswith("Pss"):
                have_pss = 1
                Pss_lines.append(line)
            elif line.startswith("Swap:"):
                Swap_lines.append(line)
            elif line.startswith("SwapPss:"):
                have_swap_pss = 1
                Swap_pss_lines.append(line)
        Shared = sum([int(line.split()[1]) for line in Shared_lines])
        Private = sum([int(line.split()[1]) for line in Private_lines])
        #Note Shared + Private = Rss above
        #The Rss in smaps includes video card mem etc.
        if have_pss:
            pss_adjust = 0.5 # add 0.5KiB as this avg error due to truncation
            Pss = sum([float(line.split()[1])+pss_adjust for line in Pss_lines])
            Shared = Pss - Private
        if have_swap_pss:
            # The kernel supports SwapPss, that shows proportional swap share.
            # Note that Swap - SwapPss is not Private Swap.
            Swap = sum([int(line.split()[1]) for line in Swap_pss_lines])
        else:
            # Note that Swap = Private swap + Shared swap.
            Swap = sum([int(line.split()[1]) for line in Swap_lines])
    elif (2,6,1) <= kernel_ver() <= (2,6,9):
        Shared = 0 #lots of overestimation, but what can we do?
        Private = Rss
    else:
        Shared = int(proc.open(pid, 'statm').readline().split()[2])
        Shared *= PAGESIZE
        Private = Rss - Shared
    return (Private, Shared, Swap, mem_id)
 def getCmdName(pid, split_args, discriminate_by_pid, exe_only=False):
    cmdline = proc.open(pid, 'cmdline').read().split("\0")
    if cmdline[-1] == '' and len(cmdline) > 1:
        cmdline = cmdline[:-1]
    path = proc.path(pid, 'exe')
    try:
        path = os.readlink(path)
        # Some symlink targets were seen to contain NULs on RHEL 5 at least
        # https://github.com/pixelb/scripts/pull/10, so take string up to NUL
        path = path.split('\0')[0]
    except OSError:
        val = sys.exc_info()[1]
        if (val.errno == errno.ENOENT or # either kernel thread or process gone
            val.errno == errno.EPERM or
            val.errno == errno.EACCES):
            raise LookupError
        raise
    if split_args:
        return ' '.join(cmdline).replace('\n', ' ')
    if path.endswith(" (deleted)"):
        path = path[:-10]
        if os.path.exists(path):
            path += " [updated]"
        else:
            #The path could be have prelink stuff so try cmdline
            #which might have the full path present. This helped for:
            #/usr/libexec/notification-area-applet.#prelink#.fX7LCT (deleted)
            if os.path.exists(cmdline[0]):
                path = cmdline[0] + " [updated]"
            else:
                path += " [deleted]"
    exe = os.path.basename(path)
    if exe_only: return exe
    proc_status = proc.open(pid, 'status').readlines()
    cmd = proc_status[0][6:-1]
    if exe.startswith(cmd):
        cmd = exe #show non truncated version
        #Note because we show the non truncated name
        #one can have separated programs as follows:
        #584.0 KiB +   1.0 MiB =   1.6 MiB    mozilla-thunder (exe -> bash)
        # 56.0 MiB +  22.2 MiB =  78.2 MiB    mozilla-thunderbird-bin
    else:
        #Lookup the parent's exe and use that if matching
        #which will merge "Web Content" with "firefox" for example
        ppid = 0
        for l in range(10):
            ps_line = proc_status[l]
            if ps_line.startswith('PPid:'):
                ppid = int(ps_line[6:-1])
                break
        if ppid:
            p_exe = getCmdName(ppid, False, False, exe_only=True)
            if exe == p_exe:
                cmd = exe
    if sys.version_info >= (3,):
        cmd = cmd.encode(errors='replace').decode()
    if discriminate_by_pid:
        cmd = '%s [%d]' % (cmd, pid)
    return cmd
 #The following matches "du -h" output
 #see also human.py
 def human(num, power="Ki", units=None):
    if units is None:
        powers = ["Ki", "Mi", "Gi", "Ti"]
        while num >= 1000: #4 digits
            num /= 1024.0
            power = powers[powers.index(power)+1]
        return "%.1f %sB" % (num, power)
    else:
        return "%.f" % ((num * 1024) / units)
 def cmd_with_count(cmd, count):
    if count > 1:
        return "%s (%u)" % (cmd, count)
    else:
        return cmd
 #Warn of possible inaccuracies
 #RAM:
 #2 = accurate & can total
 #1 = accurate only considering each process in isolation
 #0 = some shared mem not reported
 #-1= all shared mem not reported
 #SWAP:
 #2 = accurate & can total
 #1 = accurate only considering each process in isolation
 #-1= not available
 def val_accuracy(show_swap):
    """http://wiki.apache.org/spamassassin/TopSharedMemoryBug"""
    kv = kernel_ver()
    pid = os.getpid()
    swap_accuracy = -1
    if kv[:2] == (2,4):
        if proc.open('meminfo').read().find("Inact_") == -1:
            return 1, swap_accuracy
        return 0, swap_accuracy
    elif kv[:2] == (2,6):
        if os.path.exists(proc.path(pid, 'smaps')):
            swap_accuracy = 1
            if proc.open(pid, 'smaps').read().find("Pss:")!=-1:
                return 2, swap_accuracy
            else:
                return 1, swap_accuracy
        if (2,6,1) <= kv <= (2,6,9):
            return -1, swap_accuracy
        return 0, swap_accuracy
    elif kv[0] > 2 and os.path.exists(proc.path(pid, 'smaps')):
        swap_accuracy = 1
        if show_swap and proc.open(pid, 'smaps').read().find("SwapPss:")!=-1:
            swap_accuracy = 2
        return 2, swap_accuracy
    else:
        return 1, swap_accuracy
 def show_val_accuracy( ram_inacc, swap_inacc, only_total, show_swap ):
    level = ("Warning","Error")[only_total]
    # Only show significant warnings
    if not show_swap:
        swap_inacc = 2
    elif only_total:
        ram_inacc = 2
    if ram_inacc == -1:
        sys.stderr.write(
         "%s: Shared memory is not reported by this system.\n" % level
        )
        sys.stderr.write(
         "Values reported will be too large, and totals are not reported\n"
        )
    elif ram_inacc == 0:
        sys.stderr.write(
         "%s: Shared memory is not reported accurately by this system.\n" % level
        )
        sys.stderr.write(
         "Values reported could be too large, and totals are not reported\n"
        )
    elif ram_inacc == 1:
        sys.stderr.write(
         "%s: Shared memory is slightly over-estimated by this system\n"
         "for each program, so totals are not reported.\n" % level
        )
    if swap_inacc == -1:
        sys.stderr.write(
         "%s: Swap is not reported by this system.\n" % level
        )
    elif swap_inacc == 1:
        sys.stderr.write(
         "%s: Swap is over-estimated by this system for each program,\n"
         "so totals are not reported.\n" % level
        )
    sys.stderr.close()
    if only_total:
        if show_swap:
            accuracy = swap_inacc
        else:
            accuracy = ram_inacc
        if accuracy != 2:
            sys.exit(1)
 def get_memory_usage(pids_to_show, split_args, discriminate_by_pid,
                     include_self=False, only_self=False):
    cmds = {}
    shareds = {}
    mem_ids = {}
    count = {}
    swaps = {}
    for pid in os.listdir(proc.path('')):
        if not pid.isdigit():
            continue
        pid = int(pid)
        # Some filters
        if only_self and pid != our_pid:
            continue
        if pid == our_pid and not include_self:
            continue
        if pids_to_show is not None and pid not in pids_to_show:
            continue
        try:
            cmd = getCmdName(pid, split_args, discriminate_by_pid)
        except LookupError:
            #operation not permitted
            #kernel threads don't have exe links or
            #process gone
            continue
        try:
            private, shared, swap, mem_id = getMemStats(pid)
        except RuntimeError:
            continue #process gone
        if shareds.get(cmd):
            if have_pss: #add shared portion of PSS together
                shareds[cmd] += shared
            elif shareds[cmd] < shared: #just take largest shared val
                shareds[cmd] = shared
        else:
            shareds[cmd] = shared
        cmds[cmd] = cmds.setdefault(cmd, 0) + private
        if cmd in count:
            count[cmd] += 1
        else:
            count[cmd] = 1
        mem_ids.setdefault(cmd, {}).update({mem_id: None})
        # Swap (overcounting for now...)
        swaps[cmd] = swaps.setdefault(cmd, 0) + swap
    # Total swaped mem for each program
    total_swap = 0
    # Add shared mem for each program
    total = 0
    for cmd in cmds:
        cmd_count = count[cmd]
        if len(mem_ids[cmd]) == 1 and cmd_count > 1:
            # Assume this program is using CLONE_VM without CLONE_THREAD
            # so only account for one of the processes
            cmds[cmd] /= cmd_count
            if have_pss:
                shareds[cmd] /= cmd_count
        cmds[cmd] = cmds[cmd] + shareds[cmd]
        total += cmds[cmd]  # valid if PSS available
        total_swap += swaps[cmd]
    sorted_cmds = sorted(cmds.items(), key=lambda x:x[1])
    sorted_cmds = [x for x in sorted_cmds if x[1]]
    return sorted_cmds, shareds, count, total, swaps, total_swap
 def print_header(show_swap, discriminate_by_pid):
    output_string = " Private  +   Shared  =  RAM used"
    if show_swap:
        output_string += "   Swap used"
    output_string += "\tProgram"
    if discriminate_by_pid:
        output_string += "[pid]"
    output_string += "\n\n"
    sys.stdout.write(output_string)
 def print_memory_usage(sorted_cmds, shareds, count, total, swaps, total_swap,
                       show_swap):
    for cmd in sorted_cmds:
        output_string = "%9s + %9s = %9s"
        output_data = (human(cmd[1]-shareds[cmd[0]]),
                       human(shareds[cmd[0]]), human(cmd[1]))
        if show_swap:
            output_string += "   %9s"
            output_data += (human(swaps[cmd[0]]),)
        output_string += "\t%s\n"
        output_data += (cmd_with_count(cmd[0], count[cmd[0]]),)
        sys.stdout.write(output_string % output_data)
    # Only show totals if appropriate
    if have_swap_pss and show_swap:  # kernel will have_pss
        sys.stdout.write("%s\n%s%9s%s%9s\n%s\n" %
                         ("-" * 45, " " * 24, human(total), " " * 3,
                          human(total_swap), "=" * 45))
    elif have_pss:
        sys.stdout.write("%s\n%s%9s\n%s\n" %
                         ("-" * 33, " " * 24, human(total), "=" * 33))
 def verify_environment(pids_to_show):
    if os.geteuid() != 0 and not pids_to_show:
        sys.stderr.write("Sorry, root permission required, or specify pids with -p\n")
        sys.stderr.close()
        sys.exit(1)
    try:
        kernel_ver()
    except (IOError, OSError):
        val = sys.exc_info()[1]
        if val.errno == errno.ENOENT:
            sys.stderr.write(
              "Couldn't access " + proc.path('') + "\n"
              "Only GNU/Linux and FreeBSD (with linprocfs) are supported\n")
            sys.exit(2)
        else:
            raise
 def main():
    # Force the stdout and stderr streams to be unbuffered
    sys.stdout = Unbuffered(sys.stdout)
    sys.stderr = Unbuffered(sys.stderr)
    split_args, pids_to_show, watch, only_total, discriminate_by_pid, \
    show_swap = parse_options()
    verify_environment(pids_to_show)
    if not only_total:
        print_header(show_swap, discriminate_by_pid)
    if watch is not None:
        try:
            sorted_cmds = True
            while sorted_cmds:
                sorted_cmds, shareds, count, total, swaps, total_swap = \
                    get_memory_usage(pids_to_show, split_args,
                                     discriminate_by_pid)
                if only_total and show_swap and have_swap_pss:
                    sys.stdout.write(human(total_swap, units=1)+'\n')
                elif only_total and not show_swap and have_pss:
                    sys.stdout.write(human(total, units=1)+'\n')
                elif not only_total:
                    print_memory_usage(sorted_cmds, shareds, count, total,
                                       swaps, total_swap, show_swap)
                sys.stdout.flush()
                time.sleep(watch)
            else:
                sys.stdout.write('Process does not exist anymore.\n')
        except KeyboardInterrupt:
            pass
    else:
        # This is the default behavior
        sorted_cmds, shareds, count, total, swaps, total_swap = \
            get_memory_usage(pids_to_show, split_args,
                             discriminate_by_pid)
        if only_total and show_swap and have_swap_pss:
            sys.stdout.write(human(total_swap, units=1)+'\n')
        elif only_total and not show_swap and have_pss:
            sys.stdout.write(human(total, units=1)+'\n')
        elif not only_total:
            print_memory_usage(sorted_cmds, shareds, count, total, swaps,
                               total_swap, show_swap)
    # We must close explicitly, so that any EPIPE exception
    # is handled by our excepthook, rather than the default
    # one which is reenabled after this script finishes.
    sys.stdout.close()
    ram_accuracy, swap_accuracy = val_accuracy( show_swap )
    show_val_accuracy( ram_accuracy, swap_accuracy, only_total, show_swap )
 if __name__ == '__main__':
    main()
--- a/others/GDAS/data/show-queue.sh
+++ b/others/GDAS/data/show-queue.sh
@@ -0,0 +1,35 @@
 #!/bin/bash
 # Show High-priority
 echo '-------------------------------'
 echo 'Queue in high-priority clusters'
 echo '-------------------------------'
 queues="yq01-v100-box-1-8 yq01-v100-box-idl-2-8"
 for queue in ${queues}
 do
  showjob -p ${queue}
  sleep 0.3s
 done
 echo '-------------------------------'
 echo 'Queue in low-priority clusters'
 echo '-------------------------------'
 #queues="yq01-p40-3-8 yq01-p40-2-8 yq01-p40-box-1-8 yq01-v100-box-2-8"
 queues="yq01-p40-3-8 yq01-p40-box-1-8 yq01-v100-box-2-8"
 for queue in ${queues}
 do
  showjob -p ${queue}
  sleep 0.3s
 done
 echo '-------------------------------'
 echo 'Queue for other IDL teams'
 echo '-------------------------------'
 queues="yq01-v100-box-idl-8 yq01-v100-box-idl-3-8"
 for queue in ${queues}
 do
  showjob -p ${queue}
  sleep 0.3s
 done
--- a/others/GDAS/data/split-imagenet.py
+++ b/others/GDAS/data/split-imagenet.py
@@ -0,0 +1,37 @@
 import os, sys, random
 from pathlib import Path
 def sample_100_cls():
  with open('classes.txt') as f:
    content = f.readlines()
  content = [x.strip() for x in content] 
  random.seed(111)
  classes = random.sample(content, 100)
  classes.sort()
  with open('ImageNet-100.txt', 'w') as f:
    for cls in classes: f.write('{:}\n'.format(cls))
  print('-'*100)
 if __name__ == "__main__":
  #sample_100_cls()
  IN1K_root = Path.home() / '.torch' / 'ILSVRC2012'
  IN100_root = Path.home() / '.torch' / 'ILSVRC2012-100'
  assert IN1K_root.exists(), 'ImageNet directory does not exist : {:}'.format(IN1K_root)
  print ('Create soft link from ImageNet directory into : {:}'.format(IN100_root))
  with open('ImageNet-100.txt', 'r') as f:
    classes = f.readlines()
  classes = [x.strip() for x in classes]
  for sub in ['train', 'val']:
    xdir = IN100_root / sub
    if not xdir.exists(): xdir.mkdir(parents=True, exist_ok=True)
  for idx, cls in enumerate(classes):
    xdir = IN1K_root / 'train' / cls
    assert xdir.exists(), '{:} does not exist'.format(xdir)
    os.system('ln -s {:} {:}'.format(xdir, IN100_root / 'train' / cls))
    xdir = IN1K_root / 'val' / cls
    assert xdir.exists(), '{:} does not exist'.format(xdir)
    os.system('ln -s {:} {:}'.format(xdir, IN100_root / 'val' / cls))
--- a/others/GDAS/data/tiny-imagenet.py
+++ b/others/GDAS/data/tiny-imagenet.py
@@ -0,0 +1,53 @@
 import os, sys
 from pathlib import Path
 url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
 def load_val():
  path = 'tiny-imagenet-200/val/val_annotations.txt'
  cfile = open(path, 'r')
  content = cfile.readlines()
  content = [x.strip().split('\t') for x in content]
  cfile.close()
  images = [x[0] for x in content]
  labels = [x[1] for x in content]
  return images, labels
 def main():
  os.system("wget {:}".format(url))
  os.system("rm -rf tiny-imagenet-200")
  os.system("unzip -o tiny-imagenet-200.zip")
  images, labels = load_val()
  savedir = 'tiny-imagenet-200/new_val'
  if not os.path.exists(savedir): os.makedirs(savedir)
  for image, label in zip(images, labels):
    cdir = savedir + '/' + label
    if not os.path.exists(cdir): os.makedirs(cdir)
    ori_path = 'tiny-imagenet-200/val/images/' + image
    os.system("cp {:} {:}".format(ori_path, cdir))
  os.system("rm -rf tiny-imagenet-200/val")
  os.system("mv {:} tiny-imagenet-200/val".format(savedir))
 def generate_salt_pepper():
  targetdir = Path('tiny-imagenet-200/val')
  noisedir  = Path('tiny-imagenet-200/val-noise')
  assert targetdir.exists(), '{:} does not exist'.format(targetdir)
  from imgaug import augmenters as iaa
  import cv2
  aug = iaa.SaltAndPepper(p=0.2)
  for sub in targetdir.iterdir():
    if not sub.is_dir(): continue
    subdir = noisedir / sub.name
    if not subdir.exists(): os.makedirs('{:}'.format(subdir))
    images = sub.glob('*.JPEG')
    for image in images:
      I = cv2.imread(str(image))
      Inoise = aug.augment_image(I)
      savepath = subdir / image.name
      cv2.imwrite(str(savepath), Inoise)
    print ('{:} done'.format(sub))
 if __name__ == "__main__":
  #main()
  generate_salt_pepper()