update load_data_CUHK-PEDES.py and ImageNet-logs

2019-04-04 16:51:45 +08:00
parent d9026be4b2
commit 666c105f51
6 changed files with 15966 additions and 20 deletions
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-## Searching for A Robust Neural Architecture in Four GPU Hours
+## [Searching for A Robust Neural Architecture in Four GPU Hours](http://xuanyidong.com/publication/gradient-based-diff-sampler/)

 We propose A Gradient-based neural architecture search approach using Differentiable Architecture Sampler (GDAS).

--- a/data/decompress.py
+++ b/data/decompress.py
@@ -16,10 +16,21 @@ def execute(cmds, idx, num):
 def command(prefix, cmd):
  #print ('{:}{:}'.format(prefix, cmd))
  #if execute: os.system(cmd)
+  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
+  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
+  #xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
  xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
  return xcmd


+def mkILSVRC2012(destination):
+  destination = destination.resolve()
+  destination.mkdir(parents=True, exist_ok=True)
+  os.system('rm -rf {:}'.format(destination))
+  destination.mkdir(parents=True, exist_ok=True)
+  (destination/'train').mkdir(parents=True, exist_ok=True)
+
+
 def main(source, destination, xtype):
  assert source.exists(), '{:} does not exist'.format(source)
  assert (source/'train'  ).exists(), '{:}/train does not exist'.format(source)
@@ -28,25 +39,21 @@ def main(source, destination, xtype):
  else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
  #assert num_process > 0, 'invalid num_process : {:}'.format(num_process)
  source      = source.resolve()
-  destination = destination.resolve()
-  destination.mkdir(parents=True, exist_ok=True)
-  os.system('rm -rf {:}'.format(destination))
-  destination.mkdir(parents=True, exist_ok=True)
-  (destination/'train').mkdir(parents=True, exist_ok=True)
+  mkILSVRC2012(destination)

  subdirs = list( (source / 'train').glob('n*') )
  all_commands = []
  assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
-  if xtype == 'tar'  : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
-  elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
-  else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
-  all_commands.append( cmd )
  for idx, subdir in enumerate(subdirs):
    name = subdir.name
    if xtype == 'tar'  : cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train'))
    elif xtype == 'zip': cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'unzip -qd {:} {:}'.format(destination / 'train', source/'train'/'{:}'.format(name)))
    else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
    all_commands.append( cmd )
+  if xtype == 'tar'  : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
+  elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
+  else               : raise ValueError('invalid unzip type : {:}'.format(xtype))
+  all_commands.append( cmd )
  #print ('Collect all commands done : {:} lines'.format( len(all_commands) ))

  for i, cmd in enumerate(all_commands):
@@ -70,4 +77,18 @@ if __name__ == '__main__':
  assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
  source, destination = Path(sys.argv[1]), Path(sys.argv[2])
  #num_process = int(sys.argv[3])
-  main(source, destination, sys.argv[3])
+  if sys.argv[3] == 'wget':
+    with open(source) as f:
+      content = f.readlines()
+    content = [x.strip() for x in content]
+    assert len(content) == 1000, 'invalid lines={:} from {:}'.format( len(content), source )
+    mkILSVRC2012(destination)
+    all_commands = []
+    cmd = command('make-val', 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/val.tar --directory-prefix={:} ; tar -xf {:} -C {:} ; rm {:}'.format(destination, destination / 'val.tar', destination, destination / 'val.tar'))
+    all_commands.append(cmd)
+    for idx, name in enumerate(content):
+      cmd = command('{:03d}/{:03d}-th: '.format(idx, len(content)), 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/train/{:}.tar --directory-prefix={:} ; tar -xf {:}.tar -C {:} ; rm {:}.tar'.format(name, destination / 'train', destination / 'train' / name, destination / 'train', destination / 'train' / name))
+      all_commands.append(cmd)
+    for i, cmd in enumerate(all_commands): print(cmd)
+  else:
+    main(source, destination, sys.argv[3])
--- a/data/load_data_CUHK-PEDES.py
+++ b/data/load_data_CUHK-PEDES.py
@@ -0,0 +1,15 @@
+import json
+
+def main():
+  xpath = 'caption_all.json'
+  with open(xpath, 'r') as cfile:
+    cap_data = json.load(cfile)
+  print ('There are {:} images'.format( len(cap_data) ))
+  IDs = set()
+  for idx, data in enumerate( cap_data ):
+    IDs.add( data['id'] )
+    assert len( data['captions'] ) > 0, 'invalid {:}-th caption length : {:} {:}'.format(idx, data['captions'], len(data['captions']))
+  print ('IDs :: min={:}, max={:}, num={:}'.format(min(IDs), max(IDs), len(IDs)))
+
+if __name__ == '__main__':
+  main()
--- a/data/logs/GDAS_V1-imagenet-seed-3993.txt
+++ b/data/logs/GDAS_V1-imagenet-seed-3993.txt
--- a/scripts-cluster/job-script.sh
+++ b/scripts-cluster/job-script.sh
@@ -1,13 +1,15 @@
 #!/bin/bash
 #
 echo "CHECK-DATA-DIR START"
-sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
-    COMM_KM_Data COMM_km_2018 \
-    `pwd`/hadoop-data \
-    afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
+#sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
+#    COMM_KM_Data COMM_km_2018 \
+#    `pwd`/hadoop-data \
+#    afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets

 export TORCH_HOME="./data/data/"
-tar xvf ./hadoop-data/cifar.python.tar -C ${TORCH_HOME}
+wget -q http://10.127.2.44:8000/cifar.python.tar --directory-prefix=${TORCH_HOME}
+tar xvf ${TORCH_HOME}/cifar.python.tar -C ${TORCH_HOME}
+rm ${TORCH_HOME}/cifar.python.tar
 #tar xvf ./hadoop-data/ILSVRC2012.tar   -C ${TORCH_HOME}

 cifar_dir="${TORCH_HOME}/cifar.python"
--- a/scripts-cnn/train-imagenet.sh
+++ b/scripts-cnn/train-imagenet.sh
@@ -29,22 +29,35 @@ else
  tar --version
  #tar xf ./hadoop-data/ILSVRC2012.tar   -C ${TORCH_HOME}
  commands="./data/data/get_imagenet.sh"
-  ${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ${commands}
+  #${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ${commands}
  #${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-ZIP ./data/data/ILSVRC2012 zip > ./data/data/get_imagenet.sh
  #bash ./data/data/get_imagenet.sh
+  #count=0
+  #while read -r line; do
+  #  temp_file="./data/data/TEMP-${count}.sh"
+  #  echo "${line}" > ${temp_file}
+  #  bash ${temp_file}
+  #  count=$((count+1))
+    #${PY_C} ./data/ps_mem.py -p $$
+  #  free -g
+  #done < "${commands}"
+  #wget http://10.127.2.44:8000/ILSVRC2012.tar --directory-prefix=${TORCH_HOME}
+  ${PY_C} ./data/decompress.py ./data/classes.txt ${TORCH_HOME}/ILSVRC2012 wget > ${commands}
  count=0
  while read -r line; do
    temp_file="./data/data/TEMP-${count}.sh"
    echo "${line}" > ${temp_file}
    bash ${temp_file}
    count=$((count+1))
+   #${PY_C} ./data/ps_mem.py -p $$
+  #  free -g
  done < "${commands}"
+  #echo "Copy ILSVRC2012 done"
+  #tar -xvf ${TORCH_HOME}/ILSVRC2012.tar -C ${TORCH_HOME}
+  #rm ${TORCH_HOME}/ILSVRC2012.tar
  echo "Unzip ILSVRC2012 done"
 fi

-exit 1
-
-
 ${PY_C} --version

 ${PY_C} ./exps-cnn/train_base.py \