update ZIP for imagenet
This commit is contained in:
		| @@ -1,4 +1,5 @@ | |||||||
| # python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-TAR | # python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-TAR tar | ||||||
|  | # python ./data/compress.py $TORCH_HOME/ILSVRC2012/ $TORCH_HOME/ILSVRC2012-ZIP zip | ||||||
| import os, sys | import os, sys | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
|  |  | ||||||
| @@ -8,7 +9,7 @@ def command(prefix, cmd): | |||||||
|   os.system(cmd) |   os.system(cmd) | ||||||
|  |  | ||||||
|  |  | ||||||
| def main(source, destination): | def main(source, destination, xtype): | ||||||
|   assert source.exists(), '{:} does not exist'.format(source) |   assert source.exists(), '{:} does not exist'.format(source) | ||||||
|   assert (source/'train').exists(), '{:}/train does not exist'.format(source) |   assert (source/'train').exists(), '{:}/train does not exist'.format(source) | ||||||
|   assert (source/'val'  ).exists(), '{:}/val   does not exist'.format(source) |   assert (source/'val'  ).exists(), '{:}/val   does not exist'.format(source) | ||||||
| @@ -21,13 +22,17 @@ def main(source, destination): | |||||||
|  |  | ||||||
|   subdirs = list( (source / 'train').glob('n*') ) |   subdirs = list( (source / 'train').glob('n*') ) | ||||||
|   assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) ) |   assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) ) | ||||||
|   command('', 'tar -cf {:} -C {:} val'.format(destination/'val.tar', source)) |   if xtype == 'tar'  : command('', 'tar -cf {:} -C {:} val'.format(destination/'val.tar', source)) | ||||||
|  |   elif xtype == 'zip': command('', '(cd {:} ; zip -r {:} val)'.format(source, destination/'val.zip')) | ||||||
|  |   else: raise ValueError('invalid compress type : {:}'.format(xtype)) | ||||||
|   for idx, subdir in enumerate(subdirs): |   for idx, subdir in enumerate(subdirs): | ||||||
|     name = subdir.name |     name = subdir.name | ||||||
|     command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -cf {:} -C {:} {:}'.format(destination/'train'/'{:}.tar'.format(name), source / 'train', name)) |     if xtype == 'tar'  : command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -cf {:} -C {:} {:}'.format(destination/'train'/'{:}.tar'.format(name), source / 'train', name)) | ||||||
|  |     elif xtype == 'zip': command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), '(cd {:}; zip -r {:} {:})'.format(source / 'train', destination/'train'/'{:}.zip'.format(name), name)) | ||||||
|  |     else: raise ValueError('invalid compress type : {:}'.format(xtype)) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|   assert len(sys.argv) == 3, 'invalid argv : {:}'.format(sys.argv) |   assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv) | ||||||
|   source, destination = Path(sys.argv[1]), Path(sys.argv[2]) |   source, destination = Path(sys.argv[1]), Path(sys.argv[2]) | ||||||
|   main(source, destination) |   main(source, destination, sys.argv[3]) | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| # python ./data/decompress.py $TORCH_HOME/ILSVRC2012-TAR/ ./data/data/ILSVRC2012 | # python ./data/decompress.py $TORCH_HOME/ILSVRC2012-TAR/ ./data/data/ILSVRC2012 tar | ||||||
|  | # python ./data/decompress.py $TORCH_HOME/ILSVRC2012-ZIP/ ./data/data/ILSVRC2012 zip | ||||||
| import os, gc, sys | import os, gc, sys | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| import multiprocessing | import multiprocessing | ||||||
| @@ -15,14 +16,17 @@ def execute(cmds, idx, num): | |||||||
| def command(prefix, cmd): | def command(prefix, cmd): | ||||||
|   #print ('{:}{:}'.format(prefix, cmd)) |   #print ('{:}{:}'.format(prefix, cmd)) | ||||||
|   #if execute: os.system(cmd) |   #if execute: os.system(cmd) | ||||||
|   return cmd |   xcmd = '(echo {:}; {:}; sleep 0.1s)'.format(prefix, cmd) | ||||||
|  |   return xcmd | ||||||
|  |  | ||||||
|  |  | ||||||
| def main(source, destination, num_process): | def main(source, destination, xtype): | ||||||
|   assert source.exists(), '{:} does not exist'.format(source) |   assert source.exists(), '{:} does not exist'.format(source) | ||||||
|   assert (source/'train'  ).exists(), '{:}/train does not exist'.format(source) |   assert (source/'train'  ).exists(), '{:}/train does not exist'.format(source) | ||||||
|   assert (source/'val.tar').exists(), '{:}/val   does not exist'.format(source) |   if xtype == 'tar'  : assert (source/'val.tar').exists(), '{:}/val   does not exist'.format(source) | ||||||
|   assert num_process > 0, 'invalid num_process : {:}'.format(num_process) |   elif xtype == 'zip': assert (source/'val.zip').exists(), '{:}/val   does not exist'.format(source) | ||||||
|  |   else               : raise ValueError('invalid unzip type : {:}'.format(xtype)) | ||||||
|  |   #assert num_process > 0, 'invalid num_process : {:}'.format(num_process) | ||||||
|   source      = source.resolve() |   source      = source.resolve() | ||||||
|   destination = destination.resolve() |   destination = destination.resolve() | ||||||
|   destination.mkdir(parents=True, exist_ok=True) |   destination.mkdir(parents=True, exist_ok=True) | ||||||
| @@ -33,11 +37,15 @@ def main(source, destination, num_process): | |||||||
|   subdirs = list( (source / 'train').glob('n*') ) |   subdirs = list( (source / 'train').glob('n*') ) | ||||||
|   all_commands = [] |   all_commands = [] | ||||||
|   assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) ) |   assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) ) | ||||||
|   cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination)) |   if xtype == 'tar'  : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination)) | ||||||
|  |   elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip')) | ||||||
|  |   else               : raise ValueError('invalid unzip type : {:}'.format(xtype)) | ||||||
|   all_commands.append( cmd ) |   all_commands.append( cmd ) | ||||||
|   for idx, subdir in enumerate(subdirs): |   for idx, subdir in enumerate(subdirs): | ||||||
|     name = subdir.name |     name = subdir.name | ||||||
|     cmd  = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train')) |     if xtype == 'tar'  : cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train')) | ||||||
|  |     elif xtype == 'zip': cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'unzip -qd {:} {:}'.format(destination / 'train', source/'train'/'{:}'.format(name))) | ||||||
|  |     else               : raise ValueError('invalid unzip type : {:}'.format(xtype)) | ||||||
|     all_commands.append( cmd ) |     all_commands.append( cmd ) | ||||||
|   #print ('Collect all commands done : {:} lines'.format( len(all_commands) )) |   #print ('Collect all commands done : {:} lines'.format( len(all_commands) )) | ||||||
|  |  | ||||||
| @@ -61,5 +69,5 @@ def main(source, destination, num_process): | |||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|   assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv) |   assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv) | ||||||
|   source, destination = Path(sys.argv[1]), Path(sys.argv[2]) |   source, destination = Path(sys.argv[1]), Path(sys.argv[2]) | ||||||
|   num_process = int(sys.argv[3]) |   #num_process = int(sys.argv[3]) | ||||||
|   main(source, destination, num_process) |   main(source, destination, sys.argv[3]) | ||||||
|   | |||||||
| @@ -22,7 +22,7 @@ TIME=$(date +"%Y-%h-%d--%T") | |||||||
| TIME="${TIME//:/-}" | TIME="${TIME//:/-}" | ||||||
|  |  | ||||||
| JOB_SCRIPT="${FDIR}/tmps/job-${TIME}.sh" | JOB_SCRIPT="${FDIR}/tmps/job-${TIME}.sh" | ||||||
| HDFS_DIR="/user/COMM_KM_Data/${USER}/logs/alljobs/${TIME}" | HDFS_DIR="/user/COMM_KM_Data/${USER}/logs/alljobs/${NAME}-${TIME}" | ||||||
| echo "JOB-SCRIPT: "${JOB_SCRIPT} | echo "JOB-SCRIPT: "${JOB_SCRIPT} | ||||||
|  |  | ||||||
| cat ${FDIR}/job-script.sh > ${JOB_SCRIPT} | cat ${FDIR}/job-script.sh > ${JOB_SCRIPT} | ||||||
|   | |||||||
| @@ -27,7 +27,9 @@ else | |||||||
|   echo "Unzip ILSVRC2012" |   echo "Unzip ILSVRC2012" | ||||||
|   tar --version |   tar --version | ||||||
|   #tar xf ./hadoop-data/ILSVRC2012.tar   -C ${TORCH_HOME} |   #tar xf ./hadoop-data/ILSVRC2012.tar   -C ${TORCH_HOME} | ||||||
|   ${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 5 | bash |   #${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ./data/data/get_imagenet.sh | ||||||
|  |   ${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-ZIP ./data/data/ILSVRC2012 zip > ./data/data/get_imagenet.sh | ||||||
|  |   bash ./data/data/get_imagenet.sh | ||||||
|   echo "Unzip ILSVRC2012 done" |   echo "Unzip ILSVRC2012 done" | ||||||
| fi | fi | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user