update ZIP for imagenet

This commit is contained in:
Xuanyi Dong 2019-04-02 14:58:25 +08:00
parent b836c17eb6
commit 3f483c37e7
4 changed files with 32 additions and 17 deletions

View File

@ -1,4 +1,5 @@
# python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-TAR
# python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-TAR tar
# python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-ZIP zip
import os, sys
from pathlib import Path
@ -8,7 +9,7 @@ def command(prefix, cmd):
os.system(cmd)
def main(source, destination):
def main(source, destination, xtype):
assert source.exists(), '{:} does not exist'.format(source)
assert (source/'train').exists(), '{:}/train does not exist'.format(source)
assert (source/'val' ).exists(), '{:}/val does not exist'.format(source)
@ -21,13 +22,17 @@ def main(source, destination):
subdirs = list( (source / 'train').glob('n*') )
assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
command('', 'tar -cf {:} -C {:} val'.format(destination/'val.tar', source))
if xtype == 'tar' : command('', 'tar -cf {:} -C {:} val'.format(destination/'val.tar', source))
elif xtype == 'zip': command('', '(cd {:} ; zip -r {:} val)'.format(source, destination/'val.zip'))
else: raise ValueError('invalid compress type : {:}'.format(xtype))
for idx, subdir in enumerate(subdirs):
name = subdir.name
command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -cf {:} -C {:} {:}'.format(destination/'train'/'{:}.tar'.format(name), source / 'train', name))
if xtype == 'tar' : command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -cf {:} -C {:} {:}'.format(destination/'train'/'{:}.tar'.format(name), source / 'train', name))
elif xtype == 'zip': command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), '(cd {:}; zip -r {:} {:})'.format(source / 'train', destination/'train'/'{:}.zip'.format(name), name))
else: raise ValueError('invalid compress type : {:}'.format(xtype))
if __name__ == '__main__':
assert len(sys.argv) == 3, 'invalid argv : {:}'.format(sys.argv)
assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
source, destination = Path(sys.argv[1]), Path(sys.argv[2])
main(source, destination)
main(source, destination, sys.argv[3])

View File

@ -1,4 +1,5 @@
# python ./data/decompress.py $TORCH_HOME/ILSVRC2012-TAR/ ./data/data/ILSVRC2012
# python ./data/decompress.py $TORCH_HOME/ILSVRC2012-TAR/ ./data/data/ILSVRC2012 tar
# python ./data/decompress.py $TORCH_HOME/ILSVRC2012-ZIP/ ./data/data/ILSVRC2012 zip
import os, gc, sys
from pathlib import Path
import multiprocessing
@ -15,14 +16,17 @@ def execute(cmds, idx, num):
def command(prefix, cmd):
#print ('{:}{:}'.format(prefix, cmd))
#if execute: os.system(cmd)
return cmd
xcmd = '(echo {:}; {:}; sleep 0.1s)'.format(prefix, cmd)
return xcmd
def main(source, destination, num_process):
def main(source, destination, xtype):
assert source.exists(), '{:} does not exist'.format(source)
assert (source/'train' ).exists(), '{:}/train does not exist'.format(source)
assert (source/'val.tar').exists(), '{:}/val does not exist'.format(source)
assert num_process > 0, 'invalid num_process : {:}'.format(num_process)
if xtype == 'tar' : assert (source/'val.tar').exists(), '{:}/val does not exist'.format(source)
elif xtype == 'zip': assert (source/'val.zip').exists(), '{:}/val does not exist'.format(source)
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
#assert num_process > 0, 'invalid num_process : {:}'.format(num_process)
source = source.resolve()
destination = destination.resolve()
destination.mkdir(parents=True, exist_ok=True)
@ -33,11 +37,15 @@ def main(source, destination, num_process):
subdirs = list( (source / 'train').glob('n*') )
all_commands = []
assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
if xtype == 'tar' : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
all_commands.append( cmd )
for idx, subdir in enumerate(subdirs):
name = subdir.name
cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train'))
if xtype == 'tar' : cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train'))
elif xtype == 'zip': cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'unzip -qd {:} {:}'.format(destination / 'train', source/'train'/'{:}'.format(name)))
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
all_commands.append( cmd )
#print ('Collect all commands done : {:} lines'.format( len(all_commands) ))
@ -61,5 +69,5 @@ def main(source, destination, num_process):
if __name__ == '__main__':
assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
source, destination = Path(sys.argv[1]), Path(sys.argv[2])
num_process = int(sys.argv[3])
main(source, destination, num_process)
#num_process = int(sys.argv[3])
main(source, destination, sys.argv[3])

View File

@ -22,7 +22,7 @@ TIME=$(date +"%Y-%h-%d--%T")
TIME="${TIME//:/-}"
JOB_SCRIPT="${FDIR}/tmps/job-${TIME}.sh"
HDFS_DIR="/user/COMM_KM_Data/${USER}/logs/alljobs/${TIME}"
HDFS_DIR="/user/COMM_KM_Data/${USER}/logs/alljobs/${NAME}-${TIME}"
echo "JOB-SCRIPT: "${JOB_SCRIPT}
cat ${FDIR}/job-script.sh > ${JOB_SCRIPT}

View File

@ -27,7 +27,9 @@ else
echo "Unzip ILSVRC2012"
tar --version
#tar xf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 5 | bash
#${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ./data/data/get_imagenet.sh
${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-ZIP ./data/data/ILSVRC2012 zip > ./data/data/get_imagenet.sh
bash ./data/data/get_imagenet.sh
echo "Unzip ILSVRC2012 done"
fi