update missing data of GDAS
This commit is contained in:
parent
0f46f63a25
commit
180702ab8e
BIN
others/GDAS/data/GDAS.pdf
Normal file
BIN
others/GDAS/data/GDAS.pdf
Normal file
Binary file not shown.
BIN
others/GDAS/data/GDAS.png
Executable file
BIN
others/GDAS/data/GDAS.png
Executable file
Binary file not shown.
After Width: | Height: | Size: 514 KiB |
49
others/GDAS/data/Get-PTB-WT2.sh
Normal file
49
others/GDAS/data/Get-PTB-WT2.sh
Normal file
@ -0,0 +1,49 @@
|
||||
# https://github.com/salesforce/awd-lstm-lm
|
||||
echo "=== Acquiring datasets ==="
|
||||
echo "---"
|
||||
mkdir -p save
|
||||
|
||||
mkdir -p data
|
||||
cd data
|
||||
|
||||
echo "- Downloading WikiText-2 (WT2)"
|
||||
wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
|
||||
unzip -q wikitext-2-v1.zip
|
||||
cd wikitext-2
|
||||
mv wiki.train.tokens train.txt
|
||||
mv wiki.valid.tokens valid.txt
|
||||
mv wiki.test.tokens test.txt
|
||||
cd ..
|
||||
|
||||
echo "- Downloading WikiText-103 (WT2)"
|
||||
wget --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip
|
||||
unzip -q wikitext-103-v1.zip
|
||||
cd wikitext-103
|
||||
mv wiki.train.tokens train.txt
|
||||
mv wiki.valid.tokens valid.txt
|
||||
mv wiki.test.tokens test.txt
|
||||
cd ..
|
||||
|
||||
echo "- Downloading Penn Treebank (PTB)"
|
||||
wget --quiet --continue http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
|
||||
tar -xzf simple-examples.tgz
|
||||
|
||||
mkdir -p penn
|
||||
cd penn
|
||||
mv ../simple-examples/data/ptb.train.txt train.txt
|
||||
mv ../simple-examples/data/ptb.test.txt test.txt
|
||||
mv ../simple-examples/data/ptb.valid.txt valid.txt
|
||||
cd ..
|
||||
|
||||
echo "- Downloading Penn Treebank (Character)"
|
||||
mkdir -p pennchar
|
||||
cd pennchar
|
||||
mv ../simple-examples/data/ptb.char.train.txt train.txt
|
||||
mv ../simple-examples/data/ptb.char.test.txt test.txt
|
||||
mv ../simple-examples/data/ptb.char.valid.txt valid.txt
|
||||
cd ..
|
||||
|
||||
rm -rf simple-examples/
|
||||
|
||||
echo "---"
|
||||
echo "Happy language modeling :)"
|
100
others/GDAS/data/ImageNet-100.txt
Normal file
100
others/GDAS/data/ImageNet-100.txt
Normal file
@ -0,0 +1,100 @@
|
||||
n01532829
|
||||
n01560419
|
||||
n01580077
|
||||
n01614925
|
||||
n01664065
|
||||
n01751748
|
||||
n01871265
|
||||
n01924916
|
||||
n02087394
|
||||
n02091134
|
||||
n02091244
|
||||
n02094433
|
||||
n02097209
|
||||
n02102040
|
||||
n02102480
|
||||
n02105251
|
||||
n02106662
|
||||
n02108422
|
||||
n02108551
|
||||
n02123597
|
||||
n02165105
|
||||
n02190166
|
||||
n02268853
|
||||
n02279972
|
||||
n02408429
|
||||
n02412080
|
||||
n02443114
|
||||
n02488702
|
||||
n02509815
|
||||
n02606052
|
||||
n02701002
|
||||
n02782093
|
||||
n02794156
|
||||
n02802426
|
||||
n02804414
|
||||
n02808440
|
||||
n02906734
|
||||
n02917067
|
||||
n02950826
|
||||
n02963159
|
||||
n03017168
|
||||
n03042490
|
||||
n03045698
|
||||
n03063689
|
||||
n03065424
|
||||
n03100240
|
||||
n03109150
|
||||
n03124170
|
||||
n03131574
|
||||
n03272562
|
||||
n03345487
|
||||
n03443371
|
||||
n03461385
|
||||
n03527444
|
||||
n03690938
|
||||
n03692522
|
||||
n03721384
|
||||
n03729826
|
||||
n03792782
|
||||
n03838899
|
||||
n03843555
|
||||
n03874293
|
||||
n03877472
|
||||
n03877845
|
||||
n03908618
|
||||
n03929660
|
||||
n03930630
|
||||
n03933933
|
||||
n03970156
|
||||
n03976657
|
||||
n03982430
|
||||
n04004767
|
||||
n04065272
|
||||
n04141975
|
||||
n04146614
|
||||
n04152593
|
||||
n04192698
|
||||
n04200800
|
||||
n04204347
|
||||
n04317175
|
||||
n04326547
|
||||
n04344873
|
||||
n04370456
|
||||
n04389033
|
||||
n04501370
|
||||
n04515003
|
||||
n04542943
|
||||
n04554684
|
||||
n04562935
|
||||
n04596742
|
||||
n04597913
|
||||
n04606251
|
||||
n07583066
|
||||
n07718472
|
||||
n07734744
|
||||
n07873807
|
||||
n07880968
|
||||
n09229709
|
||||
n12768682
|
||||
n12998815
|
15
others/GDAS/data/README.md
Normal file
15
others/GDAS/data/README.md
Normal file
@ -0,0 +1,15 @@
|
||||
# ImageNet
|
||||
|
||||
The class names of ImageNet-1K are in `classes.txt`.
|
||||
|
||||
# A 100-class subset of ImageNet-1K : ImageNet-100
|
||||
|
||||
The class names of ImageNet-100 are in `ImageNet-100.txt`.
|
||||
|
||||
Run `python split-imagenet.py` will automatically create ImageNet-100 based on the data of ImageNet-1K. By default, we assume the data of ImageNet-1K locates at `~/.torch/ILSVRC2012`. If your data is in a different location, you need to modify line-19 and line-20 in `split-imagenet.py`.
|
||||
|
||||
# Tiny-ImageNet
|
||||
The official website is [here](https://tiny-imagenet.herokuapp.com/). Please run `python tiny-imagenet.py` to generate the correct format of Tiny ImageNet for training.
|
||||
|
||||
# PTB and WT2
|
||||
Run `bash Get-PTB-WT2.sh` to download the data.
|
1000
others/GDAS/data/classes.txt
Normal file
1000
others/GDAS/data/classes.txt
Normal file
File diff suppressed because it is too large
Load Diff
38
others/GDAS/data/compress.py
Normal file
38
others/GDAS/data/compress.py
Normal file
@ -0,0 +1,38 @@
|
||||
# python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-TAR tar
|
||||
# python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-ZIP zip
|
||||
import os, sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def command(prefix, cmd):
|
||||
print ('{:}{:}'.format(prefix, cmd))
|
||||
os.system(cmd)
|
||||
|
||||
|
||||
def main(source, destination, xtype):
|
||||
assert source.exists(), '{:} does not exist'.format(source)
|
||||
assert (source/'train').exists(), '{:}/train does not exist'.format(source)
|
||||
assert (source/'val' ).exists(), '{:}/val does not exist'.format(source)
|
||||
source = source.resolve()
|
||||
destination = destination.resolve()
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
os.system('rm -rf {:}'.format(destination))
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
(destination/'train').mkdir(parents=True, exist_ok=True)
|
||||
|
||||
subdirs = list( (source / 'train').glob('n*') )
|
||||
assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
|
||||
if xtype == 'tar' : command('', 'tar -cf {:} -C {:} val'.format(destination/'val.tar', source))
|
||||
elif xtype == 'zip': command('', '(cd {:} ; zip -r {:} val)'.format(source, destination/'val.zip'))
|
||||
else: raise ValueError('invalid compress type : {:}'.format(xtype))
|
||||
for idx, subdir in enumerate(subdirs):
|
||||
name = subdir.name
|
||||
if xtype == 'tar' : command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -cf {:} -C {:} {:}'.format(destination/'train'/'{:}.tar'.format(name), source / 'train', name))
|
||||
elif xtype == 'zip': command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), '(cd {:}; zip -r {:} {:})'.format(source / 'train', destination/'train'/'{:}.zip'.format(name), name))
|
||||
else: raise ValueError('invalid compress type : {:}'.format(xtype))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
|
||||
source, destination = Path(sys.argv[1]), Path(sys.argv[2])
|
||||
main(source, destination, sys.argv[3])
|
3761
others/GDAS/data/data/penn/test.txt
Normal file
3761
others/GDAS/data/data/penn/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
42068
others/GDAS/data/data/penn/train.txt
Normal file
42068
others/GDAS/data/data/penn/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
3370
others/GDAS/data/data/penn/valid.txt
Normal file
3370
others/GDAS/data/data/penn/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
4358
others/GDAS/data/data/wikitext-2/test.txt
Normal file
4358
others/GDAS/data/data/wikitext-2/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
36718
others/GDAS/data/data/wikitext-2/train.txt
Normal file
36718
others/GDAS/data/data/wikitext-2/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
3760
others/GDAS/data/data/wikitext-2/valid.txt
Normal file
3760
others/GDAS/data/data/wikitext-2/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
94
others/GDAS/data/decompress.py
Normal file
94
others/GDAS/data/decompress.py
Normal file
@ -0,0 +1,94 @@
|
||||
# python ./data/decompress.py $TORCH_HOME/ILSVRC2012-TAR/ ./data/data/ILSVRC2012 tar
|
||||
# python ./data/decompress.py $TORCH_HOME/ILSVRC2012-ZIP/ ./data/data/ILSVRC2012 zip
|
||||
import os, gc, sys
|
||||
from pathlib import Path
|
||||
import multiprocessing
|
||||
|
||||
|
||||
def execute(cmds, idx, num):
|
||||
#print ('{:03d} :: {:03d} :: {:03d}'.format(idx, num, len(cmds)))
|
||||
for i, cmd in enumerate(cmds):
|
||||
if i % num == idx:
|
||||
print ('{:03d} :: {:03d} :: {:03d}/{:03d} : {:}'.format(idx, num, i, len(cmds), cmd))
|
||||
os.system(cmd)
|
||||
|
||||
|
||||
def command(prefix, cmd):
|
||||
#print ('{:}{:}'.format(prefix, cmd))
|
||||
#if execute: os.system(cmd)
|
||||
#xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
|
||||
#xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
|
||||
#xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
|
||||
xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
|
||||
return xcmd
|
||||
|
||||
|
||||
def mkILSVRC2012(destination):
|
||||
destination = destination.resolve()
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
os.system('rm -rf {:}'.format(destination))
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
(destination/'train').mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def main(source, destination, xtype):
|
||||
assert source.exists(), '{:} does not exist'.format(source)
|
||||
assert (source/'train' ).exists(), '{:}/train does not exist'.format(source)
|
||||
if xtype == 'tar' : assert (source/'val.tar').exists(), '{:}/val does not exist'.format(source)
|
||||
elif xtype == 'zip': assert (source/'val.zip').exists(), '{:}/val does not exist'.format(source)
|
||||
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
|
||||
#assert num_process > 0, 'invalid num_process : {:}'.format(num_process)
|
||||
source = source.resolve()
|
||||
mkILSVRC2012(destination)
|
||||
|
||||
subdirs = list( (source / 'train').glob('n*') )
|
||||
all_commands = []
|
||||
assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
|
||||
for idx, subdir in enumerate(subdirs):
|
||||
name = subdir.name
|
||||
if xtype == 'tar' : cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train'))
|
||||
elif xtype == 'zip': cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'unzip -qd {:} {:}'.format(destination / 'train', source/'train'/'{:}'.format(name)))
|
||||
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
|
||||
all_commands.append( cmd )
|
||||
if xtype == 'tar' : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
|
||||
elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
|
||||
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
|
||||
all_commands.append( cmd )
|
||||
#print ('Collect all commands done : {:} lines'.format( len(all_commands) ))
|
||||
|
||||
for i, cmd in enumerate(all_commands):
|
||||
print(cmd)
|
||||
# os.system(cmd)
|
||||
# print ('{:03d}/{:03d} : {:}'.format(i, len(all_commands), cmd))
|
||||
# gc.collect()
|
||||
|
||||
"""
|
||||
records = []
|
||||
for i in range(num_process):
|
||||
process = multiprocessing.Process(target=execute, args=(all_commands, i, num_process))
|
||||
process.start()
|
||||
records.append(process)
|
||||
for process in records:
|
||||
process.join()
|
||||
"""
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
|
||||
source, destination = Path(sys.argv[1]), Path(sys.argv[2])
|
||||
#num_process = int(sys.argv[3])
|
||||
if sys.argv[3] == 'wget':
|
||||
with open(source) as f:
|
||||
content = f.readlines()
|
||||
content = [x.strip() for x in content]
|
||||
assert len(content) == 1000, 'invalid lines={:} from {:}'.format( len(content), source )
|
||||
mkILSVRC2012(destination)
|
||||
all_commands = []
|
||||
cmd = command('make-val', 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/val.tar --directory-prefix={:} ; tar -xf {:} -C {:} ; rm {:}'.format(destination, destination / 'val.tar', destination, destination / 'val.tar'))
|
||||
all_commands.append(cmd)
|
||||
for idx, name in enumerate(content):
|
||||
cmd = command('{:03d}/{:03d}-th: '.format(idx, len(content)), 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/train/{:}.tar --directory-prefix={:} ; tar -xf {:}.tar -C {:} ; rm {:}.tar'.format(name, destination / 'train', destination / 'train' / name, destination / 'train', destination / 'train' / name))
|
||||
all_commands.append(cmd)
|
||||
for i, cmd in enumerate(all_commands): print(cmd)
|
||||
else:
|
||||
main(source, destination, sys.argv[3])
|
BIN
others/GDAS/data/imagenet-results.png
Executable file
BIN
others/GDAS/data/imagenet-results.png
Executable file
Binary file not shown.
After Width: | Height: | Size: 139 KiB |
15
others/GDAS/data/load_data_CUHK-PEDES.py
Executable file
15
others/GDAS/data/load_data_CUHK-PEDES.py
Executable file
@ -0,0 +1,15 @@
|
||||
import json
|
||||
|
||||
def main():
|
||||
xpath = 'caption_all.json'
|
||||
with open(xpath, 'r') as cfile:
|
||||
cap_data = json.load(cfile)
|
||||
print ('There are {:} images'.format( len(cap_data) ))
|
||||
IDs = set()
|
||||
for idx, data in enumerate( cap_data ):
|
||||
IDs.add( data['id'] )
|
||||
assert len( data['captions'] ) > 0, 'invalid {:}-th caption length : {:} {:}'.format(idx, data['captions'], len(data['captions']))
|
||||
print ('IDs :: min={:}, max={:}, num={:}'.format(min(IDs), max(IDs), len(IDs)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
10831
others/GDAS/data/logs/GDAS-F1-cifar10-cut-seed-6844.txt
Normal file
10831
others/GDAS/data/logs/GDAS-F1-cifar10-cut-seed-6844.txt
Normal file
File diff suppressed because it is too large
Load Diff
15895
others/GDAS/data/logs/GDAS-V1-imagenet-seed-3993.txt
Normal file
15895
others/GDAS/data/logs/GDAS-V1-imagenet-seed-3993.txt
Normal file
File diff suppressed because it is too large
Load Diff
661
others/GDAS/data/ps_mem.py
Normal file
661
others/GDAS/data/ps_mem.py
Normal file
@ -0,0 +1,661 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Try to determine how much RAM is currently being used per program.
|
||||
# Note per _program_, not per process. So for example this script
|
||||
# will report RAM used by all httpd process together. In detail it reports:
|
||||
# sum(private RAM for program processes) + sum(Shared RAM for program processes)
|
||||
# The shared RAM is problematic to calculate, and this script automatically
|
||||
# selects the most accurate method available for your kernel.
|
||||
|
||||
# Licence: LGPLv2
|
||||
# Author: P@draigBrady.com
|
||||
# Source: http://www.pixelbeat.org/scripts/ps_mem.py
|
||||
|
||||
# V1.0 06 Jul 2005 Initial release
|
||||
# V1.1 11 Aug 2006 root permission required for accuracy
|
||||
# V1.2 08 Nov 2006 Add total to output
|
||||
# Use KiB,MiB,... for units rather than K,M,...
|
||||
# V1.3 22 Nov 2006 Ignore shared col from /proc/$pid/statm for
|
||||
# 2.6 kernels up to and including 2.6.9.
|
||||
# There it represented the total file backed extent
|
||||
# V1.4 23 Nov 2006 Remove total from output as it's meaningless
|
||||
# (the shared values overlap with other programs).
|
||||
# Display the shared column. This extra info is
|
||||
# useful, especially as it overlaps between programs.
|
||||
# V1.5 26 Mar 2007 Remove redundant recursion from human()
|
||||
# V1.6 05 Jun 2007 Also report number of processes with a given name.
|
||||
# Patch from riccardo.murri@gmail.com
|
||||
# V1.7 20 Sep 2007 Use PSS from /proc/$pid/smaps if available, which
|
||||
# fixes some over-estimation and allows totalling.
|
||||
# Enumerate the PIDs directly rather than using ps,
|
||||
# which fixes the possible race between reading
|
||||
# RSS with ps, and shared memory with this program.
|
||||
# Also we can show non truncated command names.
|
||||
# V1.8 28 Sep 2007 More accurate matching for stats in /proc/$pid/smaps
|
||||
# as otherwise could match libraries causing a crash.
|
||||
# Patch from patrice.bouchand.fedora@gmail.com
|
||||
# V1.9 20 Feb 2008 Fix invalid values reported when PSS is available.
|
||||
# Reported by Andrey Borzenkov <arvidjaar@mail.ru>
|
||||
# V3.13 17 Sep 2018
|
||||
# http://github.com/pixelb/scripts/commits/master/scripts/ps_mem.py
|
||||
|
||||
# Notes:
|
||||
#
|
||||
# All interpreted programs where the interpreter is started
|
||||
# by the shell or with env, will be merged to the interpreter
|
||||
# (as that's what's given to exec). For e.g. all python programs
|
||||
# starting with "#!/usr/bin/env python" will be grouped under python.
|
||||
# You can change this by using the full command line but that will
|
||||
# have the undesirable affect of splitting up programs started with
|
||||
# differing parameters (for e.g. mingetty tty[1-6]).
|
||||
#
|
||||
# For 2.6 kernels up to and including 2.6.13 and later 2.4 redhat kernels
|
||||
# (rmap vm without smaps) it can not be accurately determined how many pages
|
||||
# are shared between processes in general or within a program in our case:
|
||||
# http://lkml.org/lkml/2005/7/6/250
|
||||
# A warning is printed if overestimation is possible.
|
||||
# In addition for 2.6 kernels up to 2.6.9 inclusive, the shared
|
||||
# value in /proc/$pid/statm is the total file-backed extent of a process.
|
||||
# We ignore that, introducing more overestimation, again printing a warning.
|
||||
# Since kernel 2.6.23-rc8-mm1 PSS is available in smaps, which allows
|
||||
# us to calculate a more accurate value for the total RAM used by programs.
|
||||
#
|
||||
# Programs that use CLONE_VM without CLONE_THREAD are discounted by assuming
|
||||
# they're the only programs that have the same /proc/$PID/smaps file for
|
||||
# each instance. This will fail if there are multiple real instances of a
|
||||
# program that then use CLONE_VM without CLONE_THREAD, or if a clone changes
|
||||
# its memory map while we're checksumming each /proc/$PID/smaps.
|
||||
#
|
||||
# I don't take account of memory allocated for a program
|
||||
# by other programs. For e.g. memory used in the X server for
|
||||
# a program could be determined, but is not.
|
||||
#
|
||||
# FreeBSD is supported if linprocfs is mounted at /compat/linux/proc/
|
||||
# FreeBSD 8.0 supports up to a level of Linux 2.6.16
|
||||
|
||||
import getopt
|
||||
import time
|
||||
import errno
|
||||
import os
|
||||
import sys
|
||||
|
||||
# The following exits cleanly on Ctrl-C or EPIPE
|
||||
# while treating other exceptions as before.
|
||||
def std_exceptions(etype, value, tb):
|
||||
sys.excepthook = sys.__excepthook__
|
||||
if issubclass(etype, KeyboardInterrupt):
|
||||
pass
|
||||
elif issubclass(etype, IOError) and value.errno == errno.EPIPE:
|
||||
pass
|
||||
else:
|
||||
sys.__excepthook__(etype, value, tb)
|
||||
sys.excepthook = std_exceptions
|
||||
|
||||
#
|
||||
# Define some global variables
|
||||
#
|
||||
|
||||
PAGESIZE = os.sysconf("SC_PAGE_SIZE") / 1024 #KiB
|
||||
our_pid = os.getpid()
|
||||
|
||||
have_pss = 0
|
||||
have_swap_pss = 0
|
||||
|
||||
class Unbuffered(object):
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
def write(self, data):
|
||||
self.stream.write(data)
|
||||
self.stream.flush()
|
||||
def close(self):
|
||||
self.stream.close()
|
||||
def flush(self):
|
||||
self.stream.flush()
|
||||
|
||||
class Proc:
|
||||
def __init__(self):
|
||||
uname = os.uname()
|
||||
if uname[0] == "FreeBSD":
|
||||
self.proc = '/compat/linux/proc'
|
||||
else:
|
||||
self.proc = '/proc'
|
||||
|
||||
def path(self, *args):
|
||||
return os.path.join(self.proc, *(str(a) for a in args))
|
||||
|
||||
def open(self, *args):
|
||||
try:
|
||||
if sys.version_info < (3,):
|
||||
return open(self.path(*args))
|
||||
else:
|
||||
return open(self.path(*args), errors='ignore')
|
||||
except (IOError, OSError):
|
||||
val = sys.exc_info()[1]
|
||||
if (val.errno == errno.ENOENT or # kernel thread or process gone
|
||||
val.errno == errno.EPERM or
|
||||
val.errno == errno.EACCES):
|
||||
raise LookupError
|
||||
raise
|
||||
|
||||
proc = Proc()
|
||||
|
||||
|
||||
#
|
||||
# Functions
|
||||
#
|
||||
|
||||
def parse_options():
|
||||
try:
|
||||
long_options = [
|
||||
'split-args',
|
||||
'help',
|
||||
'version',
|
||||
'total',
|
||||
'discriminate-by-pid',
|
||||
'swap'
|
||||
]
|
||||
opts, args = getopt.getopt(sys.argv[1:], "shtdSp:w:", long_options)
|
||||
except getopt.GetoptError:
|
||||
sys.stderr.write(help())
|
||||
sys.exit(3)
|
||||
|
||||
if len(args):
|
||||
sys.stderr.write("Extraneous arguments: %s\n" % args)
|
||||
sys.exit(3)
|
||||
|
||||
# ps_mem.py options
|
||||
split_args = False
|
||||
pids_to_show = None
|
||||
discriminate_by_pid = False
|
||||
show_swap = False
|
||||
watch = None
|
||||
only_total = False
|
||||
|
||||
for o, a in opts:
|
||||
if o in ('-s', '--split-args'):
|
||||
split_args = True
|
||||
if o in ('-t', '--total'):
|
||||
only_total = True
|
||||
if o in ('-d', '--discriminate-by-pid'):
|
||||
discriminate_by_pid = True
|
||||
if o in ('-S', '--swap'):
|
||||
show_swap = True
|
||||
if o in ('-h', '--help'):
|
||||
sys.stdout.write(help())
|
||||
sys.exit(0)
|
||||
if o in ('--version'):
|
||||
sys.stdout.write('3.13'+'\n')
|
||||
sys.exit(0)
|
||||
if o in ('-p',):
|
||||
try:
|
||||
pids_to_show = [int(x) for x in a.split(',')]
|
||||
except:
|
||||
sys.stderr.write(help())
|
||||
sys.exit(3)
|
||||
if o in ('-w',):
|
||||
try:
|
||||
watch = int(a)
|
||||
except:
|
||||
sys.stderr.write(help())
|
||||
sys.exit(3)
|
||||
|
||||
return (
|
||||
split_args,
|
||||
pids_to_show,
|
||||
watch,
|
||||
only_total,
|
||||
discriminate_by_pid,
|
||||
show_swap
|
||||
)
|
||||
|
||||
|
||||
def help():
|
||||
help_msg = 'Usage: ps_mem [OPTION]...\n' \
|
||||
'Show program core memory usage\n' \
|
||||
'\n' \
|
||||
' -h, -help Show this help\n' \
|
||||
' -p <pid>[,pid2,...pidN] Only show memory usage PIDs in the '\
|
||||
'specified list\n' \
|
||||
' -s, --split-args Show and separate by, all command line'\
|
||||
' arguments\n' \
|
||||
' -t, --total Show only the total value\n' \
|
||||
' -d, --discriminate-by-pid Show by process rather than by program\n' \
|
||||
' -S, --swap Show swap information\n' \
|
||||
' -w <N> Measure and show process memory every'\
|
||||
' N seconds\n'
|
||||
|
||||
return help_msg
|
||||
|
||||
|
||||
# (major,minor,release)
|
||||
def kernel_ver():
|
||||
kv = proc.open('sys/kernel/osrelease').readline().split(".")[:3]
|
||||
last = len(kv)
|
||||
if last == 2:
|
||||
kv.append('0')
|
||||
last -= 1
|
||||
while last > 0:
|
||||
for char in "-_":
|
||||
kv[last] = kv[last].split(char)[0]
|
||||
try:
|
||||
int(kv[last])
|
||||
except:
|
||||
kv[last] = 0
|
||||
last -= 1
|
||||
return (int(kv[0]), int(kv[1]), int(kv[2]))
|
||||
|
||||
|
||||
#return Private,Shared,Swap(Pss),unique_id
|
||||
#Note shared is always a subset of rss (trs is not always)
|
||||
def getMemStats(pid):
|
||||
global have_pss
|
||||
global have_swap_pss
|
||||
mem_id = pid #unique
|
||||
Private_lines = []
|
||||
Shared_lines = []
|
||||
Pss_lines = []
|
||||
Rss = (int(proc.open(pid, 'statm').readline().split()[1])
|
||||
* PAGESIZE)
|
||||
Swap_lines = []
|
||||
Swap_pss_lines = []
|
||||
|
||||
Swap = 0
|
||||
|
||||
if os.path.exists(proc.path(pid, 'smaps')): # stat
|
||||
smaps = 'smaps'
|
||||
if os.path.exists(proc.path(pid, 'smaps_rollup')):
|
||||
smaps = 'smaps_rollup' # faster to process
|
||||
lines = proc.open(pid, smaps).readlines() # open
|
||||
# Note we checksum smaps as maps is usually but
|
||||
# not always different for separate processes.
|
||||
mem_id = hash(''.join(lines))
|
||||
for line in lines:
|
||||
if line.startswith("Shared"):
|
||||
Shared_lines.append(line)
|
||||
elif line.startswith("Private"):
|
||||
Private_lines.append(line)
|
||||
elif line.startswith("Pss"):
|
||||
have_pss = 1
|
||||
Pss_lines.append(line)
|
||||
elif line.startswith("Swap:"):
|
||||
Swap_lines.append(line)
|
||||
elif line.startswith("SwapPss:"):
|
||||
have_swap_pss = 1
|
||||
Swap_pss_lines.append(line)
|
||||
Shared = sum([int(line.split()[1]) for line in Shared_lines])
|
||||
Private = sum([int(line.split()[1]) for line in Private_lines])
|
||||
#Note Shared + Private = Rss above
|
||||
#The Rss in smaps includes video card mem etc.
|
||||
if have_pss:
|
||||
pss_adjust = 0.5 # add 0.5KiB as this avg error due to truncation
|
||||
Pss = sum([float(line.split()[1])+pss_adjust for line in Pss_lines])
|
||||
Shared = Pss - Private
|
||||
if have_swap_pss:
|
||||
# The kernel supports SwapPss, that shows proportional swap share.
|
||||
# Note that Swap - SwapPss is not Private Swap.
|
||||
Swap = sum([int(line.split()[1]) for line in Swap_pss_lines])
|
||||
else:
|
||||
# Note that Swap = Private swap + Shared swap.
|
||||
Swap = sum([int(line.split()[1]) for line in Swap_lines])
|
||||
elif (2,6,1) <= kernel_ver() <= (2,6,9):
|
||||
Shared = 0 #lots of overestimation, but what can we do?
|
||||
Private = Rss
|
||||
else:
|
||||
Shared = int(proc.open(pid, 'statm').readline().split()[2])
|
||||
Shared *= PAGESIZE
|
||||
Private = Rss - Shared
|
||||
return (Private, Shared, Swap, mem_id)
|
||||
|
||||
|
||||
def getCmdName(pid, split_args, discriminate_by_pid, exe_only=False):
|
||||
cmdline = proc.open(pid, 'cmdline').read().split("\0")
|
||||
if cmdline[-1] == '' and len(cmdline) > 1:
|
||||
cmdline = cmdline[:-1]
|
||||
|
||||
path = proc.path(pid, 'exe')
|
||||
try:
|
||||
path = os.readlink(path)
|
||||
# Some symlink targets were seen to contain NULs on RHEL 5 at least
|
||||
# https://github.com/pixelb/scripts/pull/10, so take string up to NUL
|
||||
path = path.split('\0')[0]
|
||||
except OSError:
|
||||
val = sys.exc_info()[1]
|
||||
if (val.errno == errno.ENOENT or # either kernel thread or process gone
|
||||
val.errno == errno.EPERM or
|
||||
val.errno == errno.EACCES):
|
||||
raise LookupError
|
||||
raise
|
||||
|
||||
if split_args:
|
||||
return ' '.join(cmdline).replace('\n', ' ')
|
||||
if path.endswith(" (deleted)"):
|
||||
path = path[:-10]
|
||||
if os.path.exists(path):
|
||||
path += " [updated]"
|
||||
else:
|
||||
#The path could be have prelink stuff so try cmdline
|
||||
#which might have the full path present. This helped for:
|
||||
#/usr/libexec/notification-area-applet.#prelink#.fX7LCT (deleted)
|
||||
if os.path.exists(cmdline[0]):
|
||||
path = cmdline[0] + " [updated]"
|
||||
else:
|
||||
path += " [deleted]"
|
||||
exe = os.path.basename(path)
|
||||
if exe_only: return exe
|
||||
|
||||
proc_status = proc.open(pid, 'status').readlines()
|
||||
cmd = proc_status[0][6:-1]
|
||||
if exe.startswith(cmd):
|
||||
cmd = exe #show non truncated version
|
||||
#Note because we show the non truncated name
|
||||
#one can have separated programs as follows:
|
||||
#584.0 KiB + 1.0 MiB = 1.6 MiB mozilla-thunder (exe -> bash)
|
||||
# 56.0 MiB + 22.2 MiB = 78.2 MiB mozilla-thunderbird-bin
|
||||
else:
|
||||
#Lookup the parent's exe and use that if matching
|
||||
#which will merge "Web Content" with "firefox" for example
|
||||
ppid = 0
|
||||
for l in range(10):
|
||||
ps_line = proc_status[l]
|
||||
if ps_line.startswith('PPid:'):
|
||||
ppid = int(ps_line[6:-1])
|
||||
break
|
||||
if ppid:
|
||||
p_exe = getCmdName(ppid, False, False, exe_only=True)
|
||||
if exe == p_exe:
|
||||
cmd = exe
|
||||
if sys.version_info >= (3,):
|
||||
cmd = cmd.encode(errors='replace').decode()
|
||||
if discriminate_by_pid:
|
||||
cmd = '%s [%d]' % (cmd, pid)
|
||||
return cmd
|
||||
|
||||
|
||||
#The following matches "du -h" output
|
||||
#see also human.py
|
||||
def human(num, power="Ki", units=None):
|
||||
if units is None:
|
||||
powers = ["Ki", "Mi", "Gi", "Ti"]
|
||||
while num >= 1000: #4 digits
|
||||
num /= 1024.0
|
||||
power = powers[powers.index(power)+1]
|
||||
return "%.1f %sB" % (num, power)
|
||||
else:
|
||||
return "%.f" % ((num * 1024) / units)
|
||||
|
||||
|
||||
def cmd_with_count(cmd, count):
|
||||
if count > 1:
|
||||
return "%s (%u)" % (cmd, count)
|
||||
else:
|
||||
return cmd
|
||||
|
||||
#Warn of possible inaccuracies
|
||||
#RAM:
|
||||
#2 = accurate & can total
|
||||
#1 = accurate only considering each process in isolation
|
||||
#0 = some shared mem not reported
|
||||
#-1= all shared mem not reported
|
||||
#SWAP:
|
||||
#2 = accurate & can total
|
||||
#1 = accurate only considering each process in isolation
|
||||
#-1= not available
|
||||
def val_accuracy(show_swap):
|
||||
"""http://wiki.apache.org/spamassassin/TopSharedMemoryBug"""
|
||||
kv = kernel_ver()
|
||||
pid = os.getpid()
|
||||
swap_accuracy = -1
|
||||
if kv[:2] == (2,4):
|
||||
if proc.open('meminfo').read().find("Inact_") == -1:
|
||||
return 1, swap_accuracy
|
||||
return 0, swap_accuracy
|
||||
elif kv[:2] == (2,6):
|
||||
if os.path.exists(proc.path(pid, 'smaps')):
|
||||
swap_accuracy = 1
|
||||
if proc.open(pid, 'smaps').read().find("Pss:")!=-1:
|
||||
return 2, swap_accuracy
|
||||
else:
|
||||
return 1, swap_accuracy
|
||||
if (2,6,1) <= kv <= (2,6,9):
|
||||
return -1, swap_accuracy
|
||||
return 0, swap_accuracy
|
||||
elif kv[0] > 2 and os.path.exists(proc.path(pid, 'smaps')):
|
||||
swap_accuracy = 1
|
||||
if show_swap and proc.open(pid, 'smaps').read().find("SwapPss:")!=-1:
|
||||
swap_accuracy = 2
|
||||
return 2, swap_accuracy
|
||||
else:
|
||||
return 1, swap_accuracy
|
||||
|
||||
def show_val_accuracy( ram_inacc, swap_inacc, only_total, show_swap ):
|
||||
level = ("Warning","Error")[only_total]
|
||||
|
||||
# Only show significant warnings
|
||||
if not show_swap:
|
||||
swap_inacc = 2
|
||||
elif only_total:
|
||||
ram_inacc = 2
|
||||
|
||||
if ram_inacc == -1:
|
||||
sys.stderr.write(
|
||||
"%s: Shared memory is not reported by this system.\n" % level
|
||||
)
|
||||
sys.stderr.write(
|
||||
"Values reported will be too large, and totals are not reported\n"
|
||||
)
|
||||
elif ram_inacc == 0:
|
||||
sys.stderr.write(
|
||||
"%s: Shared memory is not reported accurately by this system.\n" % level
|
||||
)
|
||||
sys.stderr.write(
|
||||
"Values reported could be too large, and totals are not reported\n"
|
||||
)
|
||||
elif ram_inacc == 1:
|
||||
sys.stderr.write(
|
||||
"%s: Shared memory is slightly over-estimated by this system\n"
|
||||
"for each program, so totals are not reported.\n" % level
|
||||
)
|
||||
|
||||
if swap_inacc == -1:
|
||||
sys.stderr.write(
|
||||
"%s: Swap is not reported by this system.\n" % level
|
||||
)
|
||||
elif swap_inacc == 1:
|
||||
sys.stderr.write(
|
||||
"%s: Swap is over-estimated by this system for each program,\n"
|
||||
"so totals are not reported.\n" % level
|
||||
)
|
||||
|
||||
sys.stderr.close()
|
||||
if only_total:
|
||||
if show_swap:
|
||||
accuracy = swap_inacc
|
||||
else:
|
||||
accuracy = ram_inacc
|
||||
if accuracy != 2:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_memory_usage(pids_to_show, split_args, discriminate_by_pid,
|
||||
include_self=False, only_self=False):
|
||||
cmds = {}
|
||||
shareds = {}
|
||||
mem_ids = {}
|
||||
count = {}
|
||||
swaps = {}
|
||||
for pid in os.listdir(proc.path('')):
|
||||
if not pid.isdigit():
|
||||
continue
|
||||
pid = int(pid)
|
||||
|
||||
# Some filters
|
||||
if only_self and pid != our_pid:
|
||||
continue
|
||||
if pid == our_pid and not include_self:
|
||||
continue
|
||||
if pids_to_show is not None and pid not in pids_to_show:
|
||||
continue
|
||||
|
||||
try:
|
||||
cmd = getCmdName(pid, split_args, discriminate_by_pid)
|
||||
except LookupError:
|
||||
#operation not permitted
|
||||
#kernel threads don't have exe links or
|
||||
#process gone
|
||||
continue
|
||||
|
||||
try:
|
||||
private, shared, swap, mem_id = getMemStats(pid)
|
||||
except RuntimeError:
|
||||
continue #process gone
|
||||
if shareds.get(cmd):
|
||||
if have_pss: #add shared portion of PSS together
|
||||
shareds[cmd] += shared
|
||||
elif shareds[cmd] < shared: #just take largest shared val
|
||||
shareds[cmd] = shared
|
||||
else:
|
||||
shareds[cmd] = shared
|
||||
cmds[cmd] = cmds.setdefault(cmd, 0) + private
|
||||
if cmd in count:
|
||||
count[cmd] += 1
|
||||
else:
|
||||
count[cmd] = 1
|
||||
mem_ids.setdefault(cmd, {}).update({mem_id: None})
|
||||
|
||||
# Swap (overcounting for now...)
|
||||
swaps[cmd] = swaps.setdefault(cmd, 0) + swap
|
||||
|
||||
# Total swaped mem for each program
|
||||
total_swap = 0
|
||||
|
||||
# Add shared mem for each program
|
||||
total = 0
|
||||
|
||||
for cmd in cmds:
|
||||
cmd_count = count[cmd]
|
||||
if len(mem_ids[cmd]) == 1 and cmd_count > 1:
|
||||
# Assume this program is using CLONE_VM without CLONE_THREAD
|
||||
# so only account for one of the processes
|
||||
cmds[cmd] /= cmd_count
|
||||
if have_pss:
|
||||
shareds[cmd] /= cmd_count
|
||||
cmds[cmd] = cmds[cmd] + shareds[cmd]
|
||||
total += cmds[cmd] # valid if PSS available
|
||||
total_swap += swaps[cmd]
|
||||
|
||||
sorted_cmds = sorted(cmds.items(), key=lambda x:x[1])
|
||||
sorted_cmds = [x for x in sorted_cmds if x[1]]
|
||||
|
||||
return sorted_cmds, shareds, count, total, swaps, total_swap
|
||||
|
||||
def print_header(show_swap, discriminate_by_pid):
|
||||
output_string = " Private + Shared = RAM used"
|
||||
if show_swap:
|
||||
output_string += " Swap used"
|
||||
output_string += "\tProgram"
|
||||
if discriminate_by_pid:
|
||||
output_string += "[pid]"
|
||||
output_string += "\n\n"
|
||||
sys.stdout.write(output_string)
|
||||
|
||||
|
||||
def print_memory_usage(sorted_cmds, shareds, count, total, swaps, total_swap,
|
||||
show_swap):
|
||||
for cmd in sorted_cmds:
|
||||
|
||||
output_string = "%9s + %9s = %9s"
|
||||
output_data = (human(cmd[1]-shareds[cmd[0]]),
|
||||
human(shareds[cmd[0]]), human(cmd[1]))
|
||||
if show_swap:
|
||||
output_string += " %9s"
|
||||
output_data += (human(swaps[cmd[0]]),)
|
||||
output_string += "\t%s\n"
|
||||
output_data += (cmd_with_count(cmd[0], count[cmd[0]]),)
|
||||
|
||||
sys.stdout.write(output_string % output_data)
|
||||
|
||||
# Only show totals if appropriate
|
||||
if have_swap_pss and show_swap: # kernel will have_pss
|
||||
sys.stdout.write("%s\n%s%9s%s%9s\n%s\n" %
|
||||
("-" * 45, " " * 24, human(total), " " * 3,
|
||||
human(total_swap), "=" * 45))
|
||||
elif have_pss:
|
||||
sys.stdout.write("%s\n%s%9s\n%s\n" %
|
||||
("-" * 33, " " * 24, human(total), "=" * 33))
|
||||
|
||||
|
||||
def verify_environment(pids_to_show):
|
||||
if os.geteuid() != 0 and not pids_to_show:
|
||||
sys.stderr.write("Sorry, root permission required, or specify pids with -p\n")
|
||||
sys.stderr.close()
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
kernel_ver()
|
||||
except (IOError, OSError):
|
||||
val = sys.exc_info()[1]
|
||||
if val.errno == errno.ENOENT:
|
||||
sys.stderr.write(
|
||||
"Couldn't access " + proc.path('') + "\n"
|
||||
"Only GNU/Linux and FreeBSD (with linprocfs) are supported\n")
|
||||
sys.exit(2)
|
||||
else:
|
||||
raise
|
||||
|
||||
def main():
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
sys.stdout = Unbuffered(sys.stdout)
|
||||
sys.stderr = Unbuffered(sys.stderr)
|
||||
|
||||
split_args, pids_to_show, watch, only_total, discriminate_by_pid, \
|
||||
show_swap = parse_options()
|
||||
|
||||
verify_environment(pids_to_show)
|
||||
|
||||
if not only_total:
|
||||
print_header(show_swap, discriminate_by_pid)
|
||||
|
||||
if watch is not None:
|
||||
try:
|
||||
sorted_cmds = True
|
||||
while sorted_cmds:
|
||||
sorted_cmds, shareds, count, total, swaps, total_swap = \
|
||||
get_memory_usage(pids_to_show, split_args,
|
||||
discriminate_by_pid)
|
||||
if only_total and show_swap and have_swap_pss:
|
||||
sys.stdout.write(human(total_swap, units=1)+'\n')
|
||||
elif only_total and not show_swap and have_pss:
|
||||
sys.stdout.write(human(total, units=1)+'\n')
|
||||
elif not only_total:
|
||||
print_memory_usage(sorted_cmds, shareds, count, total,
|
||||
swaps, total_swap, show_swap)
|
||||
|
||||
sys.stdout.flush()
|
||||
time.sleep(watch)
|
||||
else:
|
||||
sys.stdout.write('Process does not exist anymore.\n')
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
else:
|
||||
# This is the default behavior
|
||||
sorted_cmds, shareds, count, total, swaps, total_swap = \
|
||||
get_memory_usage(pids_to_show, split_args,
|
||||
discriminate_by_pid)
|
||||
if only_total and show_swap and have_swap_pss:
|
||||
sys.stdout.write(human(total_swap, units=1)+'\n')
|
||||
elif only_total and not show_swap and have_pss:
|
||||
sys.stdout.write(human(total, units=1)+'\n')
|
||||
elif not only_total:
|
||||
print_memory_usage(sorted_cmds, shareds, count, total, swaps,
|
||||
total_swap, show_swap)
|
||||
|
||||
# We must close explicitly, so that any EPIPE exception
|
||||
# is handled by our excepthook, rather than the default
|
||||
# one which is reenabled after this script finishes.
|
||||
sys.stdout.close()
|
||||
|
||||
ram_accuracy, swap_accuracy = val_accuracy( show_swap )
|
||||
show_val_accuracy( ram_accuracy, swap_accuracy, only_total, show_swap )
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
35
others/GDAS/data/show-queue.sh
Normal file
35
others/GDAS/data/show-queue.sh
Normal file
@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
# Show High-priority
|
||||
echo '-------------------------------'
|
||||
echo 'Queue in high-priority clusters'
|
||||
echo '-------------------------------'
|
||||
queues="yq01-v100-box-1-8 yq01-v100-box-idl-2-8"
|
||||
for queue in ${queues}
|
||||
do
|
||||
showjob -p ${queue}
|
||||
sleep 0.3s
|
||||
done
|
||||
|
||||
echo '-------------------------------'
|
||||
echo 'Queue in low-priority clusters'
|
||||
echo '-------------------------------'
|
||||
|
||||
#queues="yq01-p40-3-8 yq01-p40-2-8 yq01-p40-box-1-8 yq01-v100-box-2-8"
|
||||
queues="yq01-p40-3-8 yq01-p40-box-1-8 yq01-v100-box-2-8"
|
||||
for queue in ${queues}
|
||||
do
|
||||
showjob -p ${queue}
|
||||
sleep 0.3s
|
||||
done
|
||||
|
||||
|
||||
echo '-------------------------------'
|
||||
echo 'Queue for other IDL teams'
|
||||
echo '-------------------------------'
|
||||
|
||||
queues="yq01-v100-box-idl-8 yq01-v100-box-idl-3-8"
|
||||
for queue in ${queues}
|
||||
do
|
||||
showjob -p ${queue}
|
||||
sleep 0.3s
|
||||
done
|
37
others/GDAS/data/split-imagenet.py
Normal file
37
others/GDAS/data/split-imagenet.py
Normal file
@ -0,0 +1,37 @@
|
||||
import os, sys, random
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def sample_100_cls():
|
||||
with open('classes.txt') as f:
|
||||
content = f.readlines()
|
||||
content = [x.strip() for x in content]
|
||||
random.seed(111)
|
||||
classes = random.sample(content, 100)
|
||||
classes.sort()
|
||||
with open('ImageNet-100.txt', 'w') as f:
|
||||
for cls in classes: f.write('{:}\n'.format(cls))
|
||||
print('-'*100)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#sample_100_cls()
|
||||
IN1K_root = Path.home() / '.torch' / 'ILSVRC2012'
|
||||
IN100_root = Path.home() / '.torch' / 'ILSVRC2012-100'
|
||||
assert IN1K_root.exists(), 'ImageNet directory does not exist : {:}'.format(IN1K_root)
|
||||
print ('Create soft link from ImageNet directory into : {:}'.format(IN100_root))
|
||||
with open('ImageNet-100.txt', 'r') as f:
|
||||
classes = f.readlines()
|
||||
classes = [x.strip() for x in classes]
|
||||
for sub in ['train', 'val']:
|
||||
xdir = IN100_root / sub
|
||||
if not xdir.exists(): xdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for idx, cls in enumerate(classes):
|
||||
xdir = IN1K_root / 'train' / cls
|
||||
assert xdir.exists(), '{:} does not exist'.format(xdir)
|
||||
os.system('ln -s {:} {:}'.format(xdir, IN100_root / 'train' / cls))
|
||||
|
||||
xdir = IN1K_root / 'val' / cls
|
||||
assert xdir.exists(), '{:} does not exist'.format(xdir)
|
||||
os.system('ln -s {:} {:}'.format(xdir, IN100_root / 'val' / cls))
|
53
others/GDAS/data/tiny-imagenet.py
Normal file
53
others/GDAS/data/tiny-imagenet.py
Normal file
@ -0,0 +1,53 @@
|
||||
import os, sys
|
||||
from pathlib import Path
|
||||
|
||||
url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
|
||||
|
||||
def load_val():
|
||||
path = 'tiny-imagenet-200/val/val_annotations.txt'
|
||||
cfile = open(path, 'r')
|
||||
content = cfile.readlines()
|
||||
content = [x.strip().split('\t') for x in content]
|
||||
cfile.close()
|
||||
images = [x[0] for x in content]
|
||||
labels = [x[1] for x in content]
|
||||
return images, labels
|
||||
|
||||
def main():
|
||||
os.system("wget {:}".format(url))
|
||||
os.system("rm -rf tiny-imagenet-200")
|
||||
os.system("unzip -o tiny-imagenet-200.zip")
|
||||
images, labels = load_val()
|
||||
savedir = 'tiny-imagenet-200/new_val'
|
||||
if not os.path.exists(savedir): os.makedirs(savedir)
|
||||
for image, label in zip(images, labels):
|
||||
cdir = savedir + '/' + label
|
||||
if not os.path.exists(cdir): os.makedirs(cdir)
|
||||
ori_path = 'tiny-imagenet-200/val/images/' + image
|
||||
os.system("cp {:} {:}".format(ori_path, cdir))
|
||||
os.system("rm -rf tiny-imagenet-200/val")
|
||||
os.system("mv {:} tiny-imagenet-200/val".format(savedir))
|
||||
|
||||
def generate_salt_pepper():
|
||||
targetdir = Path('tiny-imagenet-200/val')
|
||||
noisedir = Path('tiny-imagenet-200/val-noise')
|
||||
assert targetdir.exists(), '{:} does not exist'.format(targetdir)
|
||||
from imgaug import augmenters as iaa
|
||||
import cv2
|
||||
aug = iaa.SaltAndPepper(p=0.2)
|
||||
|
||||
for sub in targetdir.iterdir():
|
||||
if not sub.is_dir(): continue
|
||||
subdir = noisedir / sub.name
|
||||
if not subdir.exists(): os.makedirs('{:}'.format(subdir))
|
||||
images = sub.glob('*.JPEG')
|
||||
for image in images:
|
||||
I = cv2.imread(str(image))
|
||||
Inoise = aug.augment_image(I)
|
||||
savepath = subdir / image.name
|
||||
cv2.imwrite(str(savepath), Inoise)
|
||||
print ('{:} done'.format(sub))
|
||||
|
||||
if __name__ == "__main__":
|
||||
#main()
|
||||
generate_salt_pepper()
|
Loading…
Reference in New Issue
Block a user