update load_data_CUHK-PEDES.py and ImageNet-logs

This commit is contained in:
Xuanyi Dong 2019-04-04 16:51:45 +08:00
parent d9026be4b2
commit 666c105f51
6 changed files with 15966 additions and 20 deletions

View File

@ -1,4 +1,4 @@
## Searching for A Robust Neural Architecture in Four GPU Hours
## [Searching for A Robust Neural Architecture in Four GPU Hours](http://xuanyidong.com/publication/gradient-based-diff-sampler/)
We propose A Gradient-based neural architecture search approach using Differentiable Architecture Sampler (GDAS).

View File

@ -16,10 +16,21 @@ def execute(cmds, idx, num):
def command(prefix, cmd):
#print ('{:}{:}'.format(prefix, cmd))
#if execute: os.system(cmd)
#xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
#xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
#xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
return xcmd
def mkILSVRC2012(destination):
destination = destination.resolve()
destination.mkdir(parents=True, exist_ok=True)
os.system('rm -rf {:}'.format(destination))
destination.mkdir(parents=True, exist_ok=True)
(destination/'train').mkdir(parents=True, exist_ok=True)
def main(source, destination, xtype):
assert source.exists(), '{:} does not exist'.format(source)
assert (source/'train' ).exists(), '{:}/train does not exist'.format(source)
@ -28,25 +39,21 @@ def main(source, destination, xtype):
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
#assert num_process > 0, 'invalid num_process : {:}'.format(num_process)
source = source.resolve()
destination = destination.resolve()
destination.mkdir(parents=True, exist_ok=True)
os.system('rm -rf {:}'.format(destination))
destination.mkdir(parents=True, exist_ok=True)
(destination/'train').mkdir(parents=True, exist_ok=True)
mkILSVRC2012(destination)
subdirs = list( (source / 'train').glob('n*') )
all_commands = []
assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
if xtype == 'tar' : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
all_commands.append( cmd )
for idx, subdir in enumerate(subdirs):
name = subdir.name
if xtype == 'tar' : cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train'))
elif xtype == 'zip': cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'unzip -qd {:} {:}'.format(destination / 'train', source/'train'/'{:}'.format(name)))
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
all_commands.append( cmd )
if xtype == 'tar' : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
all_commands.append( cmd )
#print ('Collect all commands done : {:} lines'.format( len(all_commands) ))
for i, cmd in enumerate(all_commands):
@ -70,4 +77,18 @@ if __name__ == '__main__':
assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
source, destination = Path(sys.argv[1]), Path(sys.argv[2])
#num_process = int(sys.argv[3])
main(source, destination, sys.argv[3])
if sys.argv[3] == 'wget':
with open(source) as f:
content = f.readlines()
content = [x.strip() for x in content]
assert len(content) == 1000, 'invalid lines={:} from {:}'.format( len(content), source )
mkILSVRC2012(destination)
all_commands = []
cmd = command('make-val', 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/val.tar --directory-prefix={:} ; tar -xf {:} -C {:} ; rm {:}'.format(destination, destination / 'val.tar', destination, destination / 'val.tar'))
all_commands.append(cmd)
for idx, name in enumerate(content):
cmd = command('{:03d}/{:03d}-th: '.format(idx, len(content)), 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/train/{:}.tar --directory-prefix={:} ; tar -xf {:}.tar -C {:} ; rm {:}.tar'.format(name, destination / 'train', destination / 'train' / name, destination / 'train', destination / 'train' / name))
all_commands.append(cmd)
for i, cmd in enumerate(all_commands): print(cmd)
else:
main(source, destination, sys.argv[3])

15
data/load_data_CUHK-PEDES.py Executable file
View File

@ -0,0 +1,15 @@
import json
def main():
xpath = 'caption_all.json'
with open(xpath, 'r') as cfile:
cap_data = json.load(cfile)
print ('There are {:} images'.format( len(cap_data) ))
IDs = set()
for idx, data in enumerate( cap_data ):
IDs.add( data['id'] )
assert len( data['captions'] ) > 0, 'invalid {:}-th caption length : {:} {:}'.format(idx, data['captions'], len(data['captions']))
print ('IDs :: min={:}, max={:}, num={:}'.format(min(IDs), max(IDs), len(IDs)))
if __name__ == '__main__':
main()

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,15 @@
#!/bin/bash
#
echo "CHECK-DATA-DIR START"
sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
COMM_KM_Data COMM_km_2018 \
`pwd`/hadoop-data \
afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
#sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
# COMM_KM_Data COMM_km_2018 \
# `pwd`/hadoop-data \
# afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
export TORCH_HOME="./data/data/"
tar xvf ./hadoop-data/cifar.python.tar -C ${TORCH_HOME}
wget -q http://10.127.2.44:8000/cifar.python.tar --directory-prefix=${TORCH_HOME}
tar xvf ${TORCH_HOME}/cifar.python.tar -C ${TORCH_HOME}
rm ${TORCH_HOME}/cifar.python.tar
#tar xvf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
cifar_dir="${TORCH_HOME}/cifar.python"

View File

@ -29,22 +29,35 @@ else
tar --version
#tar xf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
commands="./data/data/get_imagenet.sh"
${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ${commands}
#${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ${commands}
#${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-ZIP ./data/data/ILSVRC2012 zip > ./data/data/get_imagenet.sh
#bash ./data/data/get_imagenet.sh
#count=0
#while read -r line; do
# temp_file="./data/data/TEMP-${count}.sh"
# echo "${line}" > ${temp_file}
# bash ${temp_file}
# count=$((count+1))
#${PY_C} ./data/ps_mem.py -p $$
# free -g
#done < "${commands}"
#wget http://10.127.2.44:8000/ILSVRC2012.tar --directory-prefix=${TORCH_HOME}
${PY_C} ./data/decompress.py ./data/classes.txt ${TORCH_HOME}/ILSVRC2012 wget > ${commands}
count=0
while read -r line; do
temp_file="./data/data/TEMP-${count}.sh"
echo "${line}" > ${temp_file}
bash ${temp_file}
count=$((count+1))
#${PY_C} ./data/ps_mem.py -p $$
# free -g
done < "${commands}"
#echo "Copy ILSVRC2012 done"
#tar -xvf ${TORCH_HOME}/ILSVRC2012.tar -C ${TORCH_HOME}
#rm ${TORCH_HOME}/ILSVRC2012.tar
echo "Unzip ILSVRC2012 done"
fi
exit 1
${PY_C} --version
${PY_C} ./exps-cnn/train_base.py \