update missing data of GDAS
This commit is contained in:
		
							
								
								
									
										
											BIN
										
									
								
								others/GDAS/data/GDAS.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								others/GDAS/data/GDAS.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								others/GDAS/data/GDAS.png
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								others/GDAS/data/GDAS.png
									
									
									
									
									
										Executable file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 514 KiB  | 
							
								
								
									
										49
									
								
								others/GDAS/data/Get-PTB-WT2.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								others/GDAS/data/Get-PTB-WT2.sh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,49 @@
 | 
			
		||||
# https://github.com/salesforce/awd-lstm-lm
 | 
			
		||||
echo "=== Acquiring datasets ==="
 | 
			
		||||
echo "---"
 | 
			
		||||
mkdir -p save
 | 
			
		||||
 | 
			
		||||
mkdir -p data
 | 
			
		||||
cd data
 | 
			
		||||
 | 
			
		||||
echo "- Downloading WikiText-2 (WT2)"
 | 
			
		||||
wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
 | 
			
		||||
unzip -q wikitext-2-v1.zip
 | 
			
		||||
cd wikitext-2
 | 
			
		||||
mv wiki.train.tokens train.txt
 | 
			
		||||
mv wiki.valid.tokens valid.txt
 | 
			
		||||
mv wiki.test.tokens test.txt
 | 
			
		||||
cd ..
 | 
			
		||||
 | 
			
		||||
echo "- Downloading WikiText-103 (WT2)"
 | 
			
		||||
wget --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip
 | 
			
		||||
unzip -q wikitext-103-v1.zip
 | 
			
		||||
cd wikitext-103
 | 
			
		||||
mv wiki.train.tokens train.txt
 | 
			
		||||
mv wiki.valid.tokens valid.txt
 | 
			
		||||
mv wiki.test.tokens test.txt
 | 
			
		||||
cd ..
 | 
			
		||||
 | 
			
		||||
echo "- Downloading Penn Treebank (PTB)"
 | 
			
		||||
wget --quiet --continue http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
 | 
			
		||||
tar -xzf simple-examples.tgz
 | 
			
		||||
 | 
			
		||||
mkdir -p penn
 | 
			
		||||
cd penn
 | 
			
		||||
mv ../simple-examples/data/ptb.train.txt train.txt
 | 
			
		||||
mv ../simple-examples/data/ptb.test.txt test.txt
 | 
			
		||||
mv ../simple-examples/data/ptb.valid.txt valid.txt
 | 
			
		||||
cd ..
 | 
			
		||||
 | 
			
		||||
echo "- Downloading Penn Treebank (Character)"
 | 
			
		||||
mkdir -p pennchar
 | 
			
		||||
cd pennchar
 | 
			
		||||
mv ../simple-examples/data/ptb.char.train.txt train.txt
 | 
			
		||||
mv ../simple-examples/data/ptb.char.test.txt test.txt
 | 
			
		||||
mv ../simple-examples/data/ptb.char.valid.txt valid.txt
 | 
			
		||||
cd ..
 | 
			
		||||
 | 
			
		||||
rm -rf simple-examples/
 | 
			
		||||
 | 
			
		||||
echo "---"
 | 
			
		||||
echo "Happy language modeling :)"
 | 
			
		||||
							
								
								
									
										100
									
								
								others/GDAS/data/ImageNet-100.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								others/GDAS/data/ImageNet-100.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,100 @@
 | 
			
		||||
n01532829
 | 
			
		||||
n01560419
 | 
			
		||||
n01580077
 | 
			
		||||
n01614925
 | 
			
		||||
n01664065
 | 
			
		||||
n01751748
 | 
			
		||||
n01871265
 | 
			
		||||
n01924916
 | 
			
		||||
n02087394
 | 
			
		||||
n02091134
 | 
			
		||||
n02091244
 | 
			
		||||
n02094433
 | 
			
		||||
n02097209
 | 
			
		||||
n02102040
 | 
			
		||||
n02102480
 | 
			
		||||
n02105251
 | 
			
		||||
n02106662
 | 
			
		||||
n02108422
 | 
			
		||||
n02108551
 | 
			
		||||
n02123597
 | 
			
		||||
n02165105
 | 
			
		||||
n02190166
 | 
			
		||||
n02268853
 | 
			
		||||
n02279972
 | 
			
		||||
n02408429
 | 
			
		||||
n02412080
 | 
			
		||||
n02443114
 | 
			
		||||
n02488702
 | 
			
		||||
n02509815
 | 
			
		||||
n02606052
 | 
			
		||||
n02701002
 | 
			
		||||
n02782093
 | 
			
		||||
n02794156
 | 
			
		||||
n02802426
 | 
			
		||||
n02804414
 | 
			
		||||
n02808440
 | 
			
		||||
n02906734
 | 
			
		||||
n02917067
 | 
			
		||||
n02950826
 | 
			
		||||
n02963159
 | 
			
		||||
n03017168
 | 
			
		||||
n03042490
 | 
			
		||||
n03045698
 | 
			
		||||
n03063689
 | 
			
		||||
n03065424
 | 
			
		||||
n03100240
 | 
			
		||||
n03109150
 | 
			
		||||
n03124170
 | 
			
		||||
n03131574
 | 
			
		||||
n03272562
 | 
			
		||||
n03345487
 | 
			
		||||
n03443371
 | 
			
		||||
n03461385
 | 
			
		||||
n03527444
 | 
			
		||||
n03690938
 | 
			
		||||
n03692522
 | 
			
		||||
n03721384
 | 
			
		||||
n03729826
 | 
			
		||||
n03792782
 | 
			
		||||
n03838899
 | 
			
		||||
n03843555
 | 
			
		||||
n03874293
 | 
			
		||||
n03877472
 | 
			
		||||
n03877845
 | 
			
		||||
n03908618
 | 
			
		||||
n03929660
 | 
			
		||||
n03930630
 | 
			
		||||
n03933933
 | 
			
		||||
n03970156
 | 
			
		||||
n03976657
 | 
			
		||||
n03982430
 | 
			
		||||
n04004767
 | 
			
		||||
n04065272
 | 
			
		||||
n04141975
 | 
			
		||||
n04146614
 | 
			
		||||
n04152593
 | 
			
		||||
n04192698
 | 
			
		||||
n04200800
 | 
			
		||||
n04204347
 | 
			
		||||
n04317175
 | 
			
		||||
n04326547
 | 
			
		||||
n04344873
 | 
			
		||||
n04370456
 | 
			
		||||
n04389033
 | 
			
		||||
n04501370
 | 
			
		||||
n04515003
 | 
			
		||||
n04542943
 | 
			
		||||
n04554684
 | 
			
		||||
n04562935
 | 
			
		||||
n04596742
 | 
			
		||||
n04597913
 | 
			
		||||
n04606251
 | 
			
		||||
n07583066
 | 
			
		||||
n07718472
 | 
			
		||||
n07734744
 | 
			
		||||
n07873807
 | 
			
		||||
n07880968
 | 
			
		||||
n09229709
 | 
			
		||||
n12768682
 | 
			
		||||
n12998815
 | 
			
		||||
							
								
								
									
										15
									
								
								others/GDAS/data/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								others/GDAS/data/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,15 @@
 | 
			
		||||
# ImageNet
 | 
			
		||||
 | 
			
		||||
The class names of ImageNet-1K are in `classes.txt`.
 | 
			
		||||
 | 
			
		||||
# A 100-class subset of ImageNet-1K : ImageNet-100
 | 
			
		||||
 | 
			
		||||
The class names of ImageNet-100 are in `ImageNet-100.txt`.
 | 
			
		||||
 | 
			
		||||
Run `python split-imagenet.py` will automatically create ImageNet-100 based on the data of ImageNet-1K. By default, we assume the data of ImageNet-1K locates at `~/.torch/ILSVRC2012`. If your data is in a different location, you need to modify line-19 and line-20 in `split-imagenet.py`.
 | 
			
		||||
 | 
			
		||||
# Tiny-ImageNet
 | 
			
		||||
The official website is [here](https://tiny-imagenet.herokuapp.com/). Please run `python tiny-imagenet.py` to generate the correct format of Tiny ImageNet for training.
 | 
			
		||||
 | 
			
		||||
# PTB and WT2
 | 
			
		||||
Run `bash Get-PTB-WT2.sh` to download the data.
 | 
			
		||||
							
								
								
									
										1000
									
								
								others/GDAS/data/classes.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1000
									
								
								others/GDAS/data/classes.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										38
									
								
								others/GDAS/data/compress.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								others/GDAS/data/compress.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
			
		||||
# python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-TAR tar
 | 
			
		||||
# python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-ZIP zip
 | 
			
		||||
import os, sys
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def command(prefix, cmd):
 | 
			
		||||
  print ('{:}{:}'.format(prefix, cmd))
 | 
			
		||||
  os.system(cmd)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main(source, destination, xtype):
 | 
			
		||||
  assert source.exists(), '{:} does not exist'.format(source)
 | 
			
		||||
  assert (source/'train').exists(), '{:}/train does not exist'.format(source)
 | 
			
		||||
  assert (source/'val'  ).exists(), '{:}/val   does not exist'.format(source)
 | 
			
		||||
  source      = source.resolve()
 | 
			
		||||
  destination = destination.resolve()
 | 
			
		||||
  destination.mkdir(parents=True, exist_ok=True)
 | 
			
		||||
  os.system('rm -rf {:}'.format(destination))
 | 
			
		||||
  destination.mkdir(parents=True, exist_ok=True)
 | 
			
		||||
  (destination/'train').mkdir(parents=True, exist_ok=True)
 | 
			
		||||
 | 
			
		||||
  subdirs = list( (source / 'train').glob('n*') )
 | 
			
		||||
  assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
 | 
			
		||||
  if xtype == 'tar'  : command('', 'tar -cf {:} -C {:} val'.format(destination/'val.tar', source))
 | 
			
		||||
  elif xtype == 'zip': command('', '(cd {:} ; zip -r {:} val)'.format(source, destination/'val.zip'))
 | 
			
		||||
  else: raise ValueError('invalid compress type : {:}'.format(xtype))
 | 
			
		||||
  for idx, subdir in enumerate(subdirs):
 | 
			
		||||
    name = subdir.name
 | 
			
		||||
    if xtype == 'tar'  : command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -cf {:} -C {:} {:}'.format(destination/'train'/'{:}.tar'.format(name), source / 'train', name))
 | 
			
		||||
    elif xtype == 'zip': command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), '(cd {:}; zip -r {:} {:})'.format(source / 'train', destination/'train'/'{:}.zip'.format(name), name))
 | 
			
		||||
    else: raise ValueError('invalid compress type : {:}'.format(xtype))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
  assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
 | 
			
		||||
  source, destination = Path(sys.argv[1]), Path(sys.argv[2])
 | 
			
		||||
  main(source, destination, sys.argv[3])
 | 
			
		||||
							
								
								
									
										3761
									
								
								others/GDAS/data/data/penn/test.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3761
									
								
								others/GDAS/data/data/penn/test.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										42068
									
								
								others/GDAS/data/data/penn/train.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42068
									
								
								others/GDAS/data/data/penn/train.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										3370
									
								
								others/GDAS/data/data/penn/valid.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3370
									
								
								others/GDAS/data/data/penn/valid.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										4358
									
								
								others/GDAS/data/data/wikitext-2/test.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4358
									
								
								others/GDAS/data/data/wikitext-2/test.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										36718
									
								
								others/GDAS/data/data/wikitext-2/train.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36718
									
								
								others/GDAS/data/data/wikitext-2/train.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										3760
									
								
								others/GDAS/data/data/wikitext-2/valid.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3760
									
								
								others/GDAS/data/data/wikitext-2/valid.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										94
									
								
								others/GDAS/data/decompress.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								others/GDAS/data/decompress.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,94 @@
 | 
			
		||||
# python ./data/decompress.py $TORCH_HOME/ILSVRC2012-TAR/ ./data/data/ILSVRC2012 tar
 | 
			
		||||
# python ./data/decompress.py $TORCH_HOME/ILSVRC2012-ZIP/ ./data/data/ILSVRC2012 zip
 | 
			
		||||
import os, gc, sys
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import multiprocessing
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def execute(cmds, idx, num):
 | 
			
		||||
  #print ('{:03d} :: {:03d} :: {:03d}'.format(idx, num, len(cmds)))
 | 
			
		||||
  for i, cmd in enumerate(cmds):
 | 
			
		||||
    if i % num == idx:
 | 
			
		||||
      print ('{:03d} :: {:03d} :: {:03d}/{:03d} : {:}'.format(idx, num, i, len(cmds), cmd))
 | 
			
		||||
      os.system(cmd)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def command(prefix, cmd):
 | 
			
		||||
  #print ('{:}{:}'.format(prefix, cmd))
 | 
			
		||||
  #if execute: os.system(cmd)
 | 
			
		||||
  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
 | 
			
		||||
  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
 | 
			
		||||
  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
 | 
			
		||||
  xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
 | 
			
		||||
  return xcmd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def mkILSVRC2012(destination):
 | 
			
		||||
  destination = destination.resolve()
 | 
			
		||||
  destination.mkdir(parents=True, exist_ok=True)
 | 
			
		||||
  os.system('rm -rf {:}'.format(destination))
 | 
			
		||||
  destination.mkdir(parents=True, exist_ok=True)
 | 
			
		||||
  (destination/'train').mkdir(parents=True, exist_ok=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main(source, destination, xtype):
 | 
			
		||||
  assert source.exists(), '{:} does not exist'.format(source)
 | 
			
		||||
  assert (source/'train'  ).exists(), '{:}/train does not exist'.format(source)
 | 
			
		||||
  if xtype == 'tar'  : assert (source/'val.tar').exists(), '{:}/val   does not exist'.format(source)
 | 
			
		||||
  elif xtype == 'zip': assert (source/'val.zip').exists(), '{:}/val   does not exist'.format(source)
 | 
			
		||||
  else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
 | 
			
		||||
  #assert num_process > 0, 'invalid num_process : {:}'.format(num_process)
 | 
			
		||||
  source      = source.resolve()
 | 
			
		||||
  mkILSVRC2012(destination)
 | 
			
		||||
 | 
			
		||||
  subdirs = list( (source / 'train').glob('n*') )
 | 
			
		||||
  all_commands = []
 | 
			
		||||
  assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
 | 
			
		||||
  for idx, subdir in enumerate(subdirs):
 | 
			
		||||
    name = subdir.name
 | 
			
		||||
    if xtype == 'tar'  : cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train'))
 | 
			
		||||
    elif xtype == 'zip': cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'unzip -qd {:} {:}'.format(destination / 'train', source/'train'/'{:}'.format(name)))
 | 
			
		||||
    else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
 | 
			
		||||
    all_commands.append( cmd )
 | 
			
		||||
  if xtype == 'tar'  : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
 | 
			
		||||
  elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
 | 
			
		||||
  else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
 | 
			
		||||
  all_commands.append( cmd )
 | 
			
		||||
  #print ('Collect all commands done : {:} lines'.format( len(all_commands) ))
 | 
			
		||||
 | 
			
		||||
  for i, cmd in enumerate(all_commands):
 | 
			
		||||
    print(cmd)
 | 
			
		||||
  #  os.system(cmd)
 | 
			
		||||
  #  print ('{:03d}/{:03d} : {:}'.format(i, len(all_commands), cmd))
 | 
			
		||||
  #  gc.collect()
 | 
			
		||||
 | 
			
		||||
  """
 | 
			
		||||
  records = []
 | 
			
		||||
  for i in range(num_process):
 | 
			
		||||
    process = multiprocessing.Process(target=execute, args=(all_commands, i, num_process))
 | 
			
		||||
    process.start()
 | 
			
		||||
    records.append(process)
 | 
			
		||||
  for process in records:
 | 
			
		||||
    process.join()
 | 
			
		||||
  """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
  assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
 | 
			
		||||
  source, destination = Path(sys.argv[1]), Path(sys.argv[2])
 | 
			
		||||
  #num_process = int(sys.argv[3])
 | 
			
		||||
  if sys.argv[3] == 'wget':
 | 
			
		||||
    with open(source) as f:
 | 
			
		||||
      content = f.readlines()
 | 
			
		||||
    content = [x.strip() for x in content]
 | 
			
		||||
    assert len(content) == 1000, 'invalid lines={:} from {:}'.format( len(content), source )
 | 
			
		||||
    mkILSVRC2012(destination)
 | 
			
		||||
    all_commands = []
 | 
			
		||||
    cmd = command('make-val', 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/val.tar --directory-prefix={:} ; tar -xf {:} -C {:} ; rm {:}'.format(destination, destination / 'val.tar', destination, destination / 'val.tar'))
 | 
			
		||||
    all_commands.append(cmd)
 | 
			
		||||
    for idx, name in enumerate(content):
 | 
			
		||||
      cmd = command('{:03d}/{:03d}-th: '.format(idx, len(content)), 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/train/{:}.tar --directory-prefix={:} ; tar -xf {:}.tar -C {:} ; rm {:}.tar'.format(name, destination / 'train', destination / 'train' / name, destination / 'train', destination / 'train' / name))
 | 
			
		||||
      all_commands.append(cmd)
 | 
			
		||||
    for i, cmd in enumerate(all_commands): print(cmd)
 | 
			
		||||
  else:
 | 
			
		||||
    main(source, destination, sys.argv[3])
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								others/GDAS/data/imagenet-results.png
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								others/GDAS/data/imagenet-results.png
									
									
									
									
									
										Executable file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 139 KiB  | 
							
								
								
									
										15
									
								
								others/GDAS/data/load_data_CUHK-PEDES.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										15
									
								
								others/GDAS/data/load_data_CUHK-PEDES.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,15 @@
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
  xpath = 'caption_all.json'
 | 
			
		||||
  with open(xpath, 'r') as cfile:
 | 
			
		||||
    cap_data = json.load(cfile)
 | 
			
		||||
  print ('There are {:} images'.format( len(cap_data) ))
 | 
			
		||||
  IDs = set()
 | 
			
		||||
  for idx, data in enumerate( cap_data ):
 | 
			
		||||
    IDs.add( data['id'] )
 | 
			
		||||
    assert len( data['captions'] ) > 0, 'invalid {:}-th caption length : {:} {:}'.format(idx, data['captions'], len(data['captions']))
 | 
			
		||||
  print ('IDs :: min={:}, max={:}, num={:}'.format(min(IDs), max(IDs), len(IDs)))
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
  main()
 | 
			
		||||
							
								
								
									
										10831
									
								
								others/GDAS/data/logs/GDAS-F1-cifar10-cut-seed-6844.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10831
									
								
								others/GDAS/data/logs/GDAS-F1-cifar10-cut-seed-6844.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										15895
									
								
								others/GDAS/data/logs/GDAS-V1-imagenet-seed-3993.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15895
									
								
								others/GDAS/data/logs/GDAS-V1-imagenet-seed-3993.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										661
									
								
								others/GDAS/data/ps_mem.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										661
									
								
								others/GDAS/data/ps_mem.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,661 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
 | 
			
		||||
# Try to determine how much RAM is currently being used per program.
 | 
			
		||||
# Note per _program_, not per process. So for example this script
 | 
			
		||||
# will report RAM used by all httpd process together. In detail it reports:
 | 
			
		||||
# sum(private RAM for program processes) + sum(Shared RAM for program processes)
 | 
			
		||||
# The shared RAM is problematic to calculate, and this script automatically
 | 
			
		||||
# selects the most accurate method available for your kernel.
 | 
			
		||||
 | 
			
		||||
# Licence: LGPLv2
 | 
			
		||||
# Author:  P@draigBrady.com
 | 
			
		||||
# Source:  http://www.pixelbeat.org/scripts/ps_mem.py
 | 
			
		||||
 | 
			
		||||
# V1.0      06 Jul 2005     Initial release
 | 
			
		||||
# V1.1      11 Aug 2006     root permission required for accuracy
 | 
			
		||||
# V1.2      08 Nov 2006     Add total to output
 | 
			
		||||
#                           Use KiB,MiB,... for units rather than K,M,...
 | 
			
		||||
# V1.3      22 Nov 2006     Ignore shared col from /proc/$pid/statm for
 | 
			
		||||
#                           2.6 kernels up to and including 2.6.9.
 | 
			
		||||
#                           There it represented the total file backed extent
 | 
			
		||||
# V1.4      23 Nov 2006     Remove total from output as it's meaningless
 | 
			
		||||
#                           (the shared values overlap with other programs).
 | 
			
		||||
#                           Display the shared column. This extra info is
 | 
			
		||||
#                           useful, especially as it overlaps between programs.
 | 
			
		||||
# V1.5      26 Mar 2007     Remove redundant recursion from human()
 | 
			
		||||
# V1.6      05 Jun 2007     Also report number of processes with a given name.
 | 
			
		||||
#                           Patch from riccardo.murri@gmail.com
 | 
			
		||||
# V1.7      20 Sep 2007     Use PSS from /proc/$pid/smaps if available, which
 | 
			
		||||
#                           fixes some over-estimation and allows totalling.
 | 
			
		||||
#                           Enumerate the PIDs directly rather than using ps,
 | 
			
		||||
#                           which fixes the possible race between reading
 | 
			
		||||
#                           RSS with ps, and shared memory with this program.
 | 
			
		||||
#                           Also we can show non truncated command names.
 | 
			
		||||
# V1.8      28 Sep 2007     More accurate matching for stats in /proc/$pid/smaps
 | 
			
		||||
#                           as otherwise could match libraries causing a crash.
 | 
			
		||||
#                           Patch from patrice.bouchand.fedora@gmail.com
 | 
			
		||||
# V1.9      20 Feb 2008     Fix invalid values reported when PSS is available.
 | 
			
		||||
#                           Reported by Andrey Borzenkov <arvidjaar@mail.ru>
 | 
			
		||||
# V3.13     17 Sep 2018
 | 
			
		||||
#   http://github.com/pixelb/scripts/commits/master/scripts/ps_mem.py
 | 
			
		||||
 | 
			
		||||
# Notes:
 | 
			
		||||
#
 | 
			
		||||
# All interpreted programs where the interpreter is started
 | 
			
		||||
# by the shell or with env, will be merged to the interpreter
 | 
			
		||||
# (as that's what's given to exec). For e.g. all python programs
 | 
			
		||||
# starting with "#!/usr/bin/env python" will be grouped under python.
 | 
			
		||||
# You can change this by using the full command line but that will
 | 
			
		||||
# have the undesirable affect of splitting up programs started with
 | 
			
		||||
# differing parameters (for e.g. mingetty tty[1-6]).
 | 
			
		||||
#
 | 
			
		||||
# For 2.6 kernels up to and including 2.6.13 and later 2.4 redhat kernels
 | 
			
		||||
# (rmap vm without smaps) it can not be accurately determined how many pages
 | 
			
		||||
# are shared between processes in general or within a program in our case:
 | 
			
		||||
# http://lkml.org/lkml/2005/7/6/250
 | 
			
		||||
# A warning is printed if overestimation is possible.
 | 
			
		||||
# In addition for 2.6 kernels up to 2.6.9 inclusive, the shared
 | 
			
		||||
# value in /proc/$pid/statm is the total file-backed extent of a process.
 | 
			
		||||
# We ignore that, introducing more overestimation, again printing a warning.
 | 
			
		||||
# Since kernel 2.6.23-rc8-mm1 PSS is available in smaps, which allows
 | 
			
		||||
# us to calculate a more accurate value for the total RAM used by programs.
 | 
			
		||||
#
 | 
			
		||||
# Programs that use CLONE_VM without CLONE_THREAD are discounted by assuming
 | 
			
		||||
# they're the only programs that have the same /proc/$PID/smaps file for
 | 
			
		||||
# each instance.  This will fail if there are multiple real instances of a
 | 
			
		||||
# program that then use CLONE_VM without CLONE_THREAD, or if a clone changes
 | 
			
		||||
# its memory map while we're checksumming each /proc/$PID/smaps.
 | 
			
		||||
#
 | 
			
		||||
# I don't take account of memory allocated for a program
 | 
			
		||||
# by other programs. For e.g. memory used in the X server for
 | 
			
		||||
# a program could be determined, but is not.
 | 
			
		||||
#
 | 
			
		||||
# FreeBSD is supported if linprocfs is mounted at /compat/linux/proc/
 | 
			
		||||
# FreeBSD 8.0 supports up to a level of Linux 2.6.16
 | 
			
		||||
 | 
			
		||||
import getopt
 | 
			
		||||
import time
 | 
			
		||||
import errno
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
# The following exits cleanly on Ctrl-C or EPIPE
 | 
			
		||||
# while treating other exceptions as before.
 | 
			
		||||
def std_exceptions(etype, value, tb):
 | 
			
		||||
    sys.excepthook = sys.__excepthook__
 | 
			
		||||
    if issubclass(etype, KeyboardInterrupt):
 | 
			
		||||
        pass
 | 
			
		||||
    elif issubclass(etype, IOError) and value.errno == errno.EPIPE:
 | 
			
		||||
        pass
 | 
			
		||||
    else:
 | 
			
		||||
        sys.__excepthook__(etype, value, tb)
 | 
			
		||||
sys.excepthook = std_exceptions
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
#   Define some global variables
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
PAGESIZE = os.sysconf("SC_PAGE_SIZE") / 1024 #KiB
 | 
			
		||||
our_pid = os.getpid()
 | 
			
		||||
 | 
			
		||||
have_pss = 0
 | 
			
		||||
have_swap_pss = 0
 | 
			
		||||
 | 
			
		||||
class Unbuffered(object):
 | 
			
		||||
   def __init__(self, stream):
 | 
			
		||||
       self.stream = stream
 | 
			
		||||
   def write(self, data):
 | 
			
		||||
       self.stream.write(data)
 | 
			
		||||
       self.stream.flush()
 | 
			
		||||
   def close(self):
 | 
			
		||||
       self.stream.close()
 | 
			
		||||
   def flush(self):
 | 
			
		||||
      self.stream.flush()
 | 
			
		||||
 | 
			
		||||
class Proc:
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        uname = os.uname()
 | 
			
		||||
        if uname[0] == "FreeBSD":
 | 
			
		||||
            self.proc = '/compat/linux/proc'
 | 
			
		||||
        else:
 | 
			
		||||
            self.proc = '/proc'
 | 
			
		||||
 | 
			
		||||
    def path(self, *args):
 | 
			
		||||
        return os.path.join(self.proc, *(str(a) for a in args))
 | 
			
		||||
 | 
			
		||||
    def open(self, *args):
 | 
			
		||||
        try:
 | 
			
		||||
            if sys.version_info < (3,):
 | 
			
		||||
                return open(self.path(*args))
 | 
			
		||||
            else:
 | 
			
		||||
                return open(self.path(*args), errors='ignore')
 | 
			
		||||
        except (IOError, OSError):
 | 
			
		||||
            val = sys.exc_info()[1]
 | 
			
		||||
            if (val.errno == errno.ENOENT or # kernel thread or process gone
 | 
			
		||||
                val.errno == errno.EPERM or
 | 
			
		||||
                val.errno == errno.EACCES):
 | 
			
		||||
                raise LookupError
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
proc = Proc()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
#   Functions
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
def parse_options():
 | 
			
		||||
    try:
 | 
			
		||||
        long_options = [
 | 
			
		||||
            'split-args',
 | 
			
		||||
            'help',
 | 
			
		||||
            'version',
 | 
			
		||||
            'total',
 | 
			
		||||
            'discriminate-by-pid',
 | 
			
		||||
            'swap'
 | 
			
		||||
        ]
 | 
			
		||||
        opts, args = getopt.getopt(sys.argv[1:], "shtdSp:w:", long_options)
 | 
			
		||||
    except getopt.GetoptError:
 | 
			
		||||
        sys.stderr.write(help())
 | 
			
		||||
        sys.exit(3)
 | 
			
		||||
 | 
			
		||||
    if len(args):
 | 
			
		||||
        sys.stderr.write("Extraneous arguments: %s\n" % args)
 | 
			
		||||
        sys.exit(3)
 | 
			
		||||
 | 
			
		||||
    # ps_mem.py options
 | 
			
		||||
    split_args = False
 | 
			
		||||
    pids_to_show = None
 | 
			
		||||
    discriminate_by_pid = False
 | 
			
		||||
    show_swap = False
 | 
			
		||||
    watch = None
 | 
			
		||||
    only_total = False
 | 
			
		||||
 | 
			
		||||
    for o, a in opts:
 | 
			
		||||
        if o in ('-s', '--split-args'):
 | 
			
		||||
            split_args = True
 | 
			
		||||
        if o in ('-t', '--total'):
 | 
			
		||||
            only_total = True
 | 
			
		||||
        if o in ('-d', '--discriminate-by-pid'):
 | 
			
		||||
            discriminate_by_pid = True
 | 
			
		||||
        if o in ('-S', '--swap'):
 | 
			
		||||
            show_swap = True
 | 
			
		||||
        if o in ('-h', '--help'):
 | 
			
		||||
            sys.stdout.write(help())
 | 
			
		||||
            sys.exit(0)
 | 
			
		||||
        if o in ('--version'):
 | 
			
		||||
            sys.stdout.write('3.13'+'\n')
 | 
			
		||||
            sys.exit(0)
 | 
			
		||||
        if o in ('-p',):
 | 
			
		||||
            try:
 | 
			
		||||
                pids_to_show = [int(x) for x in a.split(',')]
 | 
			
		||||
            except:
 | 
			
		||||
                sys.stderr.write(help())
 | 
			
		||||
                sys.exit(3)
 | 
			
		||||
        if o in ('-w',):
 | 
			
		||||
            try:
 | 
			
		||||
                watch = int(a)
 | 
			
		||||
            except:
 | 
			
		||||
                sys.stderr.write(help())
 | 
			
		||||
                sys.exit(3)
 | 
			
		||||
 | 
			
		||||
    return (
 | 
			
		||||
        split_args,
 | 
			
		||||
        pids_to_show,
 | 
			
		||||
        watch,
 | 
			
		||||
        only_total,
 | 
			
		||||
        discriminate_by_pid,
 | 
			
		||||
        show_swap
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def help():
 | 
			
		||||
    help_msg = 'Usage: ps_mem [OPTION]...\n' \
 | 
			
		||||
        'Show program core memory usage\n' \
 | 
			
		||||
        '\n' \
 | 
			
		||||
        '  -h, -help                   Show this help\n' \
 | 
			
		||||
        '  -p <pid>[,pid2,...pidN]     Only show memory usage PIDs in the '\
 | 
			
		||||
        'specified list\n' \
 | 
			
		||||
        '  -s, --split-args            Show and separate by, all command line'\
 | 
			
		||||
        ' arguments\n' \
 | 
			
		||||
        '  -t, --total                 Show only the total value\n' \
 | 
			
		||||
        '  -d, --discriminate-by-pid   Show by process rather than by program\n' \
 | 
			
		||||
        '  -S, --swap                  Show swap information\n' \
 | 
			
		||||
        '  -w <N>                      Measure and show process memory every'\
 | 
			
		||||
        ' N seconds\n'
 | 
			
		||||
 | 
			
		||||
    return help_msg
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# (major,minor,release)
 | 
			
		||||
def kernel_ver():
 | 
			
		||||
    kv = proc.open('sys/kernel/osrelease').readline().split(".")[:3]
 | 
			
		||||
    last = len(kv)
 | 
			
		||||
    if last == 2:
 | 
			
		||||
        kv.append('0')
 | 
			
		||||
    last -= 1
 | 
			
		||||
    while last > 0:
 | 
			
		||||
        for char in "-_":
 | 
			
		||||
            kv[last] = kv[last].split(char)[0]
 | 
			
		||||
        try:
 | 
			
		||||
            int(kv[last])
 | 
			
		||||
        except:
 | 
			
		||||
            kv[last] = 0
 | 
			
		||||
        last -= 1
 | 
			
		||||
    return (int(kv[0]), int(kv[1]), int(kv[2]))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#return Private,Shared,Swap(Pss),unique_id
 | 
			
		||||
#Note shared is always a subset of rss (trs is not always)
 | 
			
		||||
def getMemStats(pid):
 | 
			
		||||
    global have_pss
 | 
			
		||||
    global have_swap_pss
 | 
			
		||||
    mem_id = pid #unique
 | 
			
		||||
    Private_lines = []
 | 
			
		||||
    Shared_lines = []
 | 
			
		||||
    Pss_lines = []
 | 
			
		||||
    Rss = (int(proc.open(pid, 'statm').readline().split()[1])
 | 
			
		||||
           * PAGESIZE)
 | 
			
		||||
    Swap_lines = []
 | 
			
		||||
    Swap_pss_lines = []
 | 
			
		||||
 | 
			
		||||
    Swap = 0
 | 
			
		||||
 | 
			
		||||
    if os.path.exists(proc.path(pid, 'smaps')):  # stat
 | 
			
		||||
        smaps = 'smaps'
 | 
			
		||||
        if os.path.exists(proc.path(pid, 'smaps_rollup')):
 | 
			
		||||
            smaps = 'smaps_rollup' # faster to process
 | 
			
		||||
        lines = proc.open(pid, smaps).readlines()  # open
 | 
			
		||||
        # Note we checksum smaps as maps is usually but
 | 
			
		||||
        # not always different for separate processes.
 | 
			
		||||
        mem_id = hash(''.join(lines))
 | 
			
		||||
        for line in lines:
 | 
			
		||||
            if line.startswith("Shared"):
 | 
			
		||||
                Shared_lines.append(line)
 | 
			
		||||
            elif line.startswith("Private"):
 | 
			
		||||
                Private_lines.append(line)
 | 
			
		||||
            elif line.startswith("Pss"):
 | 
			
		||||
                have_pss = 1
 | 
			
		||||
                Pss_lines.append(line)
 | 
			
		||||
            elif line.startswith("Swap:"):
 | 
			
		||||
                Swap_lines.append(line)
 | 
			
		||||
            elif line.startswith("SwapPss:"):
 | 
			
		||||
                have_swap_pss = 1
 | 
			
		||||
                Swap_pss_lines.append(line)
 | 
			
		||||
        Shared = sum([int(line.split()[1]) for line in Shared_lines])
 | 
			
		||||
        Private = sum([int(line.split()[1]) for line in Private_lines])
 | 
			
		||||
        #Note Shared + Private = Rss above
 | 
			
		||||
        #The Rss in smaps includes video card mem etc.
 | 
			
		||||
        if have_pss:
 | 
			
		||||
            pss_adjust = 0.5 # add 0.5KiB as this avg error due to truncation
 | 
			
		||||
            Pss = sum([float(line.split()[1])+pss_adjust for line in Pss_lines])
 | 
			
		||||
            Shared = Pss - Private
 | 
			
		||||
        if have_swap_pss:
 | 
			
		||||
            # The kernel supports SwapPss, that shows proportional swap share.
 | 
			
		||||
            # Note that Swap - SwapPss is not Private Swap.
 | 
			
		||||
            Swap = sum([int(line.split()[1]) for line in Swap_pss_lines])
 | 
			
		||||
        else:
 | 
			
		||||
            # Note that Swap = Private swap + Shared swap.
 | 
			
		||||
            Swap = sum([int(line.split()[1]) for line in Swap_lines])
 | 
			
		||||
    elif (2,6,1) <= kernel_ver() <= (2,6,9):
 | 
			
		||||
        Shared = 0 #lots of overestimation, but what can we do?
 | 
			
		||||
        Private = Rss
 | 
			
		||||
    else:
 | 
			
		||||
        Shared = int(proc.open(pid, 'statm').readline().split()[2])
 | 
			
		||||
        Shared *= PAGESIZE
 | 
			
		||||
        Private = Rss - Shared
 | 
			
		||||
    return (Private, Shared, Swap, mem_id)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def getCmdName(pid, split_args, discriminate_by_pid, exe_only=False):
 | 
			
		||||
    cmdline = proc.open(pid, 'cmdline').read().split("\0")
 | 
			
		||||
    if cmdline[-1] == '' and len(cmdline) > 1:
 | 
			
		||||
        cmdline = cmdline[:-1]
 | 
			
		||||
 | 
			
		||||
    path = proc.path(pid, 'exe')
 | 
			
		||||
    try:
 | 
			
		||||
        path = os.readlink(path)
 | 
			
		||||
        # Some symlink targets were seen to contain NULs on RHEL 5 at least
 | 
			
		||||
        # https://github.com/pixelb/scripts/pull/10, so take string up to NUL
 | 
			
		||||
        path = path.split('\0')[0]
 | 
			
		||||
    except OSError:
 | 
			
		||||
        val = sys.exc_info()[1]
 | 
			
		||||
        if (val.errno == errno.ENOENT or # either kernel thread or process gone
 | 
			
		||||
            val.errno == errno.EPERM or
 | 
			
		||||
            val.errno == errno.EACCES):
 | 
			
		||||
            raise LookupError
 | 
			
		||||
        raise
 | 
			
		||||
 | 
			
		||||
    if split_args:
 | 
			
		||||
        return ' '.join(cmdline).replace('\n', ' ')
 | 
			
		||||
    if path.endswith(" (deleted)"):
 | 
			
		||||
        path = path[:-10]
 | 
			
		||||
        if os.path.exists(path):
 | 
			
		||||
            path += " [updated]"
 | 
			
		||||
        else:
 | 
			
		||||
            #The path could be have prelink stuff so try cmdline
 | 
			
		||||
            #which might have the full path present. This helped for:
 | 
			
		||||
            #/usr/libexec/notification-area-applet.#prelink#.fX7LCT (deleted)
 | 
			
		||||
            if os.path.exists(cmdline[0]):
 | 
			
		||||
                path = cmdline[0] + " [updated]"
 | 
			
		||||
            else:
 | 
			
		||||
                path += " [deleted]"
 | 
			
		||||
    exe = os.path.basename(path)
 | 
			
		||||
    if exe_only: return exe
 | 
			
		||||
 | 
			
		||||
    proc_status = proc.open(pid, 'status').readlines()
 | 
			
		||||
    cmd = proc_status[0][6:-1]
 | 
			
		||||
    if exe.startswith(cmd):
 | 
			
		||||
        cmd = exe #show non truncated version
 | 
			
		||||
        #Note because we show the non truncated name
 | 
			
		||||
        #one can have separated programs as follows:
 | 
			
		||||
        #584.0 KiB +   1.0 MiB =   1.6 MiB    mozilla-thunder (exe -> bash)
 | 
			
		||||
        # 56.0 MiB +  22.2 MiB =  78.2 MiB    mozilla-thunderbird-bin
 | 
			
		||||
    else:
 | 
			
		||||
        #Lookup the parent's exe and use that if matching
 | 
			
		||||
        #which will merge "Web Content" with "firefox" for example
 | 
			
		||||
        ppid = 0
 | 
			
		||||
        for l in range(10):
 | 
			
		||||
            ps_line = proc_status[l]
 | 
			
		||||
            if ps_line.startswith('PPid:'):
 | 
			
		||||
                ppid = int(ps_line[6:-1])
 | 
			
		||||
                break
 | 
			
		||||
        if ppid:
 | 
			
		||||
            p_exe = getCmdName(ppid, False, False, exe_only=True)
 | 
			
		||||
            if exe == p_exe:
 | 
			
		||||
                cmd = exe
 | 
			
		||||
    if sys.version_info >= (3,):
 | 
			
		||||
        cmd = cmd.encode(errors='replace').decode()
 | 
			
		||||
    if discriminate_by_pid:
 | 
			
		||||
        cmd = '%s [%d]' % (cmd, pid)
 | 
			
		||||
    return cmd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#The following matches "du -h" output
 | 
			
		||||
#see also human.py
 | 
			
		||||
def human(num, power="Ki", units=None):
 | 
			
		||||
    if units is None:
 | 
			
		||||
        powers = ["Ki", "Mi", "Gi", "Ti"]
 | 
			
		||||
        while num >= 1000: #4 digits
 | 
			
		||||
            num /= 1024.0
 | 
			
		||||
            power = powers[powers.index(power)+1]
 | 
			
		||||
        return "%.1f %sB" % (num, power)
 | 
			
		||||
    else:
 | 
			
		||||
        return "%.f" % ((num * 1024) / units)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cmd_with_count(cmd, count):
 | 
			
		||||
    if count > 1:
 | 
			
		||||
        return "%s (%u)" % (cmd, count)
 | 
			
		||||
    else:
 | 
			
		||||
        return cmd
 | 
			
		||||
 | 
			
		||||
#Warn of possible inaccuracies
 | 
			
		||||
#RAM:
 | 
			
		||||
#2 = accurate & can total
 | 
			
		||||
#1 = accurate only considering each process in isolation
 | 
			
		||||
#0 = some shared mem not reported
 | 
			
		||||
#-1= all shared mem not reported
 | 
			
		||||
#SWAP:
 | 
			
		||||
#2 = accurate & can total
 | 
			
		||||
#1 = accurate only considering each process in isolation
 | 
			
		||||
#-1= not available
 | 
			
		||||
def val_accuracy(show_swap):
 | 
			
		||||
    """http://wiki.apache.org/spamassassin/TopSharedMemoryBug"""
 | 
			
		||||
    kv = kernel_ver()
 | 
			
		||||
    pid = os.getpid()
 | 
			
		||||
    swap_accuracy = -1
 | 
			
		||||
    if kv[:2] == (2,4):
 | 
			
		||||
        if proc.open('meminfo').read().find("Inact_") == -1:
 | 
			
		||||
            return 1, swap_accuracy
 | 
			
		||||
        return 0, swap_accuracy
 | 
			
		||||
    elif kv[:2] == (2,6):
 | 
			
		||||
        if os.path.exists(proc.path(pid, 'smaps')):
 | 
			
		||||
            swap_accuracy = 1
 | 
			
		||||
            if proc.open(pid, 'smaps').read().find("Pss:")!=-1:
 | 
			
		||||
                return 2, swap_accuracy
 | 
			
		||||
            else:
 | 
			
		||||
                return 1, swap_accuracy
 | 
			
		||||
        if (2,6,1) <= kv <= (2,6,9):
 | 
			
		||||
            return -1, swap_accuracy
 | 
			
		||||
        return 0, swap_accuracy
 | 
			
		||||
    elif kv[0] > 2 and os.path.exists(proc.path(pid, 'smaps')):
 | 
			
		||||
        swap_accuracy = 1
 | 
			
		||||
        if show_swap and proc.open(pid, 'smaps').read().find("SwapPss:")!=-1:
 | 
			
		||||
            swap_accuracy = 2
 | 
			
		||||
        return 2, swap_accuracy
 | 
			
		||||
    else:
 | 
			
		||||
        return 1, swap_accuracy
 | 
			
		||||
 | 
			
		||||
def show_val_accuracy( ram_inacc, swap_inacc, only_total, show_swap ):
 | 
			
		||||
    level = ("Warning","Error")[only_total]
 | 
			
		||||
 | 
			
		||||
    # Only show significant warnings
 | 
			
		||||
    if not show_swap:
 | 
			
		||||
        swap_inacc = 2
 | 
			
		||||
    elif only_total:
 | 
			
		||||
        ram_inacc = 2
 | 
			
		||||
 | 
			
		||||
    if ram_inacc == -1:
 | 
			
		||||
        sys.stderr.write(
 | 
			
		||||
         "%s: Shared memory is not reported by this system.\n" % level
 | 
			
		||||
        )
 | 
			
		||||
        sys.stderr.write(
 | 
			
		||||
         "Values reported will be too large, and totals are not reported\n"
 | 
			
		||||
        )
 | 
			
		||||
    elif ram_inacc == 0:
 | 
			
		||||
        sys.stderr.write(
 | 
			
		||||
         "%s: Shared memory is not reported accurately by this system.\n" % level
 | 
			
		||||
        )
 | 
			
		||||
        sys.stderr.write(
 | 
			
		||||
         "Values reported could be too large, and totals are not reported\n"
 | 
			
		||||
        )
 | 
			
		||||
    elif ram_inacc == 1:
 | 
			
		||||
        sys.stderr.write(
 | 
			
		||||
         "%s: Shared memory is slightly over-estimated by this system\n"
 | 
			
		||||
         "for each program, so totals are not reported.\n" % level
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    if swap_inacc == -1:
 | 
			
		||||
        sys.stderr.write(
 | 
			
		||||
         "%s: Swap is not reported by this system.\n" % level
 | 
			
		||||
        )
 | 
			
		||||
    elif swap_inacc == 1:
 | 
			
		||||
        sys.stderr.write(
 | 
			
		||||
         "%s: Swap is over-estimated by this system for each program,\n"
 | 
			
		||||
         "so totals are not reported.\n" % level
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    sys.stderr.close()
 | 
			
		||||
    if only_total:
 | 
			
		||||
        if show_swap:
 | 
			
		||||
            accuracy = swap_inacc
 | 
			
		||||
        else:
 | 
			
		||||
            accuracy = ram_inacc
 | 
			
		||||
        if accuracy != 2:
 | 
			
		||||
            sys.exit(1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_memory_usage(pids_to_show, split_args, discriminate_by_pid,
 | 
			
		||||
                     include_self=False, only_self=False):
 | 
			
		||||
    cmds = {}
 | 
			
		||||
    shareds = {}
 | 
			
		||||
    mem_ids = {}
 | 
			
		||||
    count = {}
 | 
			
		||||
    swaps = {}
 | 
			
		||||
    for pid in os.listdir(proc.path('')):
 | 
			
		||||
        if not pid.isdigit():
 | 
			
		||||
            continue
 | 
			
		||||
        pid = int(pid)
 | 
			
		||||
 | 
			
		||||
        # Some filters
 | 
			
		||||
        if only_self and pid != our_pid:
 | 
			
		||||
            continue
 | 
			
		||||
        if pid == our_pid and not include_self:
 | 
			
		||||
            continue
 | 
			
		||||
        if pids_to_show is not None and pid not in pids_to_show:
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            cmd = getCmdName(pid, split_args, discriminate_by_pid)
 | 
			
		||||
        except LookupError:
 | 
			
		||||
            #operation not permitted
 | 
			
		||||
            #kernel threads don't have exe links or
 | 
			
		||||
            #process gone
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            private, shared, swap, mem_id = getMemStats(pid)
 | 
			
		||||
        except RuntimeError:
 | 
			
		||||
            continue #process gone
 | 
			
		||||
        if shareds.get(cmd):
 | 
			
		||||
            if have_pss: #add shared portion of PSS together
 | 
			
		||||
                shareds[cmd] += shared
 | 
			
		||||
            elif shareds[cmd] < shared: #just take largest shared val
 | 
			
		||||
                shareds[cmd] = shared
 | 
			
		||||
        else:
 | 
			
		||||
            shareds[cmd] = shared
 | 
			
		||||
        cmds[cmd] = cmds.setdefault(cmd, 0) + private
 | 
			
		||||
        if cmd in count:
 | 
			
		||||
            count[cmd] += 1
 | 
			
		||||
        else:
 | 
			
		||||
            count[cmd] = 1
 | 
			
		||||
        mem_ids.setdefault(cmd, {}).update({mem_id: None})
 | 
			
		||||
 | 
			
		||||
        # Swap (overcounting for now...)
 | 
			
		||||
        swaps[cmd] = swaps.setdefault(cmd, 0) + swap
 | 
			
		||||
 | 
			
		||||
    # Total swaped mem for each program
 | 
			
		||||
    total_swap = 0
 | 
			
		||||
 | 
			
		||||
    # Add shared mem for each program
 | 
			
		||||
    total = 0
 | 
			
		||||
 | 
			
		||||
    for cmd in cmds:
 | 
			
		||||
        cmd_count = count[cmd]
 | 
			
		||||
        if len(mem_ids[cmd]) == 1 and cmd_count > 1:
 | 
			
		||||
            # Assume this program is using CLONE_VM without CLONE_THREAD
 | 
			
		||||
            # so only account for one of the processes
 | 
			
		||||
            cmds[cmd] /= cmd_count
 | 
			
		||||
            if have_pss:
 | 
			
		||||
                shareds[cmd] /= cmd_count
 | 
			
		||||
        cmds[cmd] = cmds[cmd] + shareds[cmd]
 | 
			
		||||
        total += cmds[cmd]  # valid if PSS available
 | 
			
		||||
        total_swap += swaps[cmd]
 | 
			
		||||
 | 
			
		||||
    sorted_cmds = sorted(cmds.items(), key=lambda x:x[1])
 | 
			
		||||
    sorted_cmds = [x for x in sorted_cmds if x[1]]
 | 
			
		||||
 | 
			
		||||
    return sorted_cmds, shareds, count, total, swaps, total_swap
 | 
			
		||||
 | 
			
		||||
def print_header(show_swap, discriminate_by_pid):
 | 
			
		||||
    output_string = " Private  +   Shared  =  RAM used"
 | 
			
		||||
    if show_swap:
 | 
			
		||||
        output_string += "   Swap used"
 | 
			
		||||
    output_string += "\tProgram"
 | 
			
		||||
    if discriminate_by_pid:
 | 
			
		||||
        output_string += "[pid]"
 | 
			
		||||
    output_string += "\n\n"
 | 
			
		||||
    sys.stdout.write(output_string)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def print_memory_usage(sorted_cmds, shareds, count, total, swaps, total_swap,
 | 
			
		||||
                       show_swap):
 | 
			
		||||
    for cmd in sorted_cmds:
 | 
			
		||||
 | 
			
		||||
        output_string = "%9s + %9s = %9s"
 | 
			
		||||
        output_data = (human(cmd[1]-shareds[cmd[0]]),
 | 
			
		||||
                       human(shareds[cmd[0]]), human(cmd[1]))
 | 
			
		||||
        if show_swap:
 | 
			
		||||
            output_string += "   %9s"
 | 
			
		||||
            output_data += (human(swaps[cmd[0]]),)
 | 
			
		||||
        output_string += "\t%s\n"
 | 
			
		||||
        output_data += (cmd_with_count(cmd[0], count[cmd[0]]),)
 | 
			
		||||
 | 
			
		||||
        sys.stdout.write(output_string % output_data)
 | 
			
		||||
 | 
			
		||||
    # Only show totals if appropriate
 | 
			
		||||
    if have_swap_pss and show_swap:  # kernel will have_pss
 | 
			
		||||
        sys.stdout.write("%s\n%s%9s%s%9s\n%s\n" %
 | 
			
		||||
                         ("-" * 45, " " * 24, human(total), " " * 3,
 | 
			
		||||
                          human(total_swap), "=" * 45))
 | 
			
		||||
    elif have_pss:
 | 
			
		||||
        sys.stdout.write("%s\n%s%9s\n%s\n" %
 | 
			
		||||
                         ("-" * 33, " " * 24, human(total), "=" * 33))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def verify_environment(pids_to_show):
 | 
			
		||||
    if os.geteuid() != 0 and not pids_to_show:
 | 
			
		||||
        sys.stderr.write("Sorry, root permission required, or specify pids with -p\n")
 | 
			
		||||
        sys.stderr.close()
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        kernel_ver()
 | 
			
		||||
    except (IOError, OSError):
 | 
			
		||||
        val = sys.exc_info()[1]
 | 
			
		||||
        if val.errno == errno.ENOENT:
 | 
			
		||||
            sys.stderr.write(
 | 
			
		||||
              "Couldn't access " + proc.path('') + "\n"
 | 
			
		||||
              "Only GNU/Linux and FreeBSD (with linprocfs) are supported\n")
 | 
			
		||||
            sys.exit(2)
 | 
			
		||||
        else:
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    # Force the stdout and stderr streams to be unbuffered
 | 
			
		||||
    sys.stdout = Unbuffered(sys.stdout)
 | 
			
		||||
    sys.stderr = Unbuffered(sys.stderr)
 | 
			
		||||
 | 
			
		||||
    split_args, pids_to_show, watch, only_total, discriminate_by_pid, \
 | 
			
		||||
    show_swap = parse_options()
 | 
			
		||||
 | 
			
		||||
    verify_environment(pids_to_show)
 | 
			
		||||
 | 
			
		||||
    if not only_total:
 | 
			
		||||
        print_header(show_swap, discriminate_by_pid)
 | 
			
		||||
 | 
			
		||||
    if watch is not None:
 | 
			
		||||
        try:
 | 
			
		||||
            sorted_cmds = True
 | 
			
		||||
            while sorted_cmds:
 | 
			
		||||
                sorted_cmds, shareds, count, total, swaps, total_swap = \
 | 
			
		||||
                    get_memory_usage(pids_to_show, split_args,
 | 
			
		||||
                                     discriminate_by_pid)
 | 
			
		||||
                if only_total and show_swap and have_swap_pss:
 | 
			
		||||
                    sys.stdout.write(human(total_swap, units=1)+'\n')
 | 
			
		||||
                elif only_total and not show_swap and have_pss:
 | 
			
		||||
                    sys.stdout.write(human(total, units=1)+'\n')
 | 
			
		||||
                elif not only_total:
 | 
			
		||||
                    print_memory_usage(sorted_cmds, shareds, count, total,
 | 
			
		||||
                                       swaps, total_swap, show_swap)
 | 
			
		||||
 | 
			
		||||
                sys.stdout.flush()
 | 
			
		||||
                time.sleep(watch)
 | 
			
		||||
            else:
 | 
			
		||||
                sys.stdout.write('Process does not exist anymore.\n')
 | 
			
		||||
        except KeyboardInterrupt:
 | 
			
		||||
            pass
 | 
			
		||||
    else:
 | 
			
		||||
        # This is the default behavior
 | 
			
		||||
        sorted_cmds, shareds, count, total, swaps, total_swap = \
 | 
			
		||||
            get_memory_usage(pids_to_show, split_args,
 | 
			
		||||
                             discriminate_by_pid)
 | 
			
		||||
        if only_total and show_swap and have_swap_pss:
 | 
			
		||||
            sys.stdout.write(human(total_swap, units=1)+'\n')
 | 
			
		||||
        elif only_total and not show_swap and have_pss:
 | 
			
		||||
            sys.stdout.write(human(total, units=1)+'\n')
 | 
			
		||||
        elif not only_total:
 | 
			
		||||
            print_memory_usage(sorted_cmds, shareds, count, total, swaps,
 | 
			
		||||
                               total_swap, show_swap)
 | 
			
		||||
 | 
			
		||||
    # We must close explicitly, so that any EPIPE exception
 | 
			
		||||
    # is handled by our excepthook, rather than the default
 | 
			
		||||
    # one which is reenabled after this script finishes.
 | 
			
		||||
    sys.stdout.close()
 | 
			
		||||
 | 
			
		||||
    ram_accuracy, swap_accuracy = val_accuracy( show_swap )
 | 
			
		||||
    show_val_accuracy( ram_accuracy, swap_accuracy, only_total, show_swap )
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
							
								
								
									
										35
									
								
								others/GDAS/data/show-queue.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								others/GDAS/data/show-queue.sh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
# Show High-priority
 | 
			
		||||
echo '-------------------------------'
 | 
			
		||||
echo 'Queue in high-priority clusters'
 | 
			
		||||
echo '-------------------------------'
 | 
			
		||||
queues="yq01-v100-box-1-8 yq01-v100-box-idl-2-8"
 | 
			
		||||
for queue in ${queues}
 | 
			
		||||
do
 | 
			
		||||
  showjob -p ${queue}
 | 
			
		||||
  sleep 0.3s
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
echo '-------------------------------'
 | 
			
		||||
echo 'Queue in low-priority clusters'
 | 
			
		||||
echo '-------------------------------'
 | 
			
		||||
 | 
			
		||||
#queues="yq01-p40-3-8 yq01-p40-2-8 yq01-p40-box-1-8 yq01-v100-box-2-8"
 | 
			
		||||
queues="yq01-p40-3-8 yq01-p40-box-1-8 yq01-v100-box-2-8"
 | 
			
		||||
for queue in ${queues}
 | 
			
		||||
do
 | 
			
		||||
  showjob -p ${queue}
 | 
			
		||||
  sleep 0.3s
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
echo '-------------------------------'
 | 
			
		||||
echo 'Queue for other IDL teams'
 | 
			
		||||
echo '-------------------------------'
 | 
			
		||||
 | 
			
		||||
queues="yq01-v100-box-idl-8 yq01-v100-box-idl-3-8"
 | 
			
		||||
for queue in ${queues}
 | 
			
		||||
do
 | 
			
		||||
  showjob -p ${queue}
 | 
			
		||||
  sleep 0.3s
 | 
			
		||||
done
 | 
			
		||||
							
								
								
									
										37
									
								
								others/GDAS/data/split-imagenet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								others/GDAS/data/split-imagenet.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,37 @@
 | 
			
		||||
import os, sys, random
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def sample_100_cls():
 | 
			
		||||
  with open('classes.txt') as f:
 | 
			
		||||
    content = f.readlines()
 | 
			
		||||
  content = [x.strip() for x in content] 
 | 
			
		||||
  random.seed(111)
 | 
			
		||||
  classes = random.sample(content, 100)
 | 
			
		||||
  classes.sort()
 | 
			
		||||
  with open('ImageNet-100.txt', 'w') as f:
 | 
			
		||||
    for cls in classes: f.write('{:}\n'.format(cls))
 | 
			
		||||
  print('-'*100)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
  #sample_100_cls()
 | 
			
		||||
  IN1K_root = Path.home() / '.torch' / 'ILSVRC2012'
 | 
			
		||||
  IN100_root = Path.home() / '.torch' / 'ILSVRC2012-100'
 | 
			
		||||
  assert IN1K_root.exists(), 'ImageNet directory does not exist : {:}'.format(IN1K_root)
 | 
			
		||||
  print ('Create soft link from ImageNet directory into : {:}'.format(IN100_root))
 | 
			
		||||
  with open('ImageNet-100.txt', 'r') as f:
 | 
			
		||||
    classes = f.readlines()
 | 
			
		||||
  classes = [x.strip() for x in classes]
 | 
			
		||||
  for sub in ['train', 'val']:
 | 
			
		||||
    xdir = IN100_root / sub
 | 
			
		||||
    if not xdir.exists(): xdir.mkdir(parents=True, exist_ok=True)
 | 
			
		||||
 | 
			
		||||
  for idx, cls in enumerate(classes):
 | 
			
		||||
    xdir = IN1K_root / 'train' / cls
 | 
			
		||||
    assert xdir.exists(), '{:} does not exist'.format(xdir)
 | 
			
		||||
    os.system('ln -s {:} {:}'.format(xdir, IN100_root / 'train' / cls))
 | 
			
		||||
 | 
			
		||||
    xdir = IN1K_root / 'val' / cls
 | 
			
		||||
    assert xdir.exists(), '{:} does not exist'.format(xdir)
 | 
			
		||||
    os.system('ln -s {:} {:}'.format(xdir, IN100_root / 'val' / cls))
 | 
			
		||||
							
								
								
									
										53
									
								
								others/GDAS/data/tiny-imagenet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								others/GDAS/data/tiny-imagenet.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,53 @@
 | 
			
		||||
import os, sys
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
 | 
			
		||||
url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
 | 
			
		||||
 | 
			
		||||
def load_val():
 | 
			
		||||
  path = 'tiny-imagenet-200/val/val_annotations.txt'
 | 
			
		||||
  cfile = open(path, 'r')
 | 
			
		||||
  content = cfile.readlines()
 | 
			
		||||
  content = [x.strip().split('\t') for x in content]
 | 
			
		||||
  cfile.close()
 | 
			
		||||
  images = [x[0] for x in content]
 | 
			
		||||
  labels = [x[1] for x in content]
 | 
			
		||||
  return images, labels
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
  os.system("wget {:}".format(url))
 | 
			
		||||
  os.system("rm -rf tiny-imagenet-200")
 | 
			
		||||
  os.system("unzip -o tiny-imagenet-200.zip")
 | 
			
		||||
  images, labels = load_val()
 | 
			
		||||
  savedir = 'tiny-imagenet-200/new_val'
 | 
			
		||||
  if not os.path.exists(savedir): os.makedirs(savedir)
 | 
			
		||||
  for image, label in zip(images, labels):
 | 
			
		||||
    cdir = savedir + '/' + label
 | 
			
		||||
    if not os.path.exists(cdir): os.makedirs(cdir)
 | 
			
		||||
    ori_path = 'tiny-imagenet-200/val/images/' + image
 | 
			
		||||
    os.system("cp {:} {:}".format(ori_path, cdir))
 | 
			
		||||
  os.system("rm -rf tiny-imagenet-200/val")
 | 
			
		||||
  os.system("mv {:} tiny-imagenet-200/val".format(savedir))
 | 
			
		||||
 | 
			
		||||
def generate_salt_pepper():
 | 
			
		||||
  targetdir = Path('tiny-imagenet-200/val')
 | 
			
		||||
  noisedir  = Path('tiny-imagenet-200/val-noise')
 | 
			
		||||
  assert targetdir.exists(), '{:} does not exist'.format(targetdir)
 | 
			
		||||
  from imgaug import augmenters as iaa
 | 
			
		||||
  import cv2
 | 
			
		||||
  aug = iaa.SaltAndPepper(p=0.2)
 | 
			
		||||
 | 
			
		||||
  for sub in targetdir.iterdir():
 | 
			
		||||
    if not sub.is_dir(): continue
 | 
			
		||||
    subdir = noisedir / sub.name
 | 
			
		||||
    if not subdir.exists(): os.makedirs('{:}'.format(subdir))
 | 
			
		||||
    images = sub.glob('*.JPEG')
 | 
			
		||||
    for image in images:
 | 
			
		||||
      I = cv2.imread(str(image))
 | 
			
		||||
      Inoise = aug.augment_image(I)
 | 
			
		||||
      savepath = subdir / image.name
 | 
			
		||||
      cv2.imwrite(str(savepath), Inoise)
 | 
			
		||||
    print ('{:} done'.format(sub))
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
  #main()
 | 
			
		||||
  generate_salt_pepper()
 | 
			
		||||
		Reference in New Issue
	
	Block a user