Clean unnecessary files

2019-11-14 14:10:39 +11:00
parent 7843940846
commit 178d84a7e5
120 changed files with 1 additions and 128603 deletions
--- a/others/GDAS/lib/datasets/LanguageDataset.py
+++ b/others/GDAS/lib/datasets/LanguageDataset.py
@@ -1,122 +0,0 @@
-import os
-import torch
-
-from collections import Counter
-
-
-class Dictionary(object):
-  def __init__(self):
-    self.word2idx = {}
-    self.idx2word = []
-    self.counter = Counter()
-    self.total = 0
-
-  def add_word(self, word):
-    if word not in self.word2idx:
-      self.idx2word.append(word)
-      self.word2idx[word] = len(self.idx2word) - 1
-    token_id = self.word2idx[word]
-    self.counter[token_id] += 1
-    self.total += 1
-    return self.word2idx[word]
-
-  def __len__(self):
-    return len(self.idx2word)
-
-
-class Corpus(object):
-  def __init__(self, path):
-    self.dictionary = Dictionary()
-    self.train = self.tokenize(os.path.join(path, 'train.txt'))
-    self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
-    self.test = self.tokenize(os.path.join(path, 'test.txt'))
-
-  def tokenize(self, path):
-    """Tokenizes a text file."""
-    assert os.path.exists(path)
-    # Add words to the dictionary
-    with open(path, 'r', encoding='utf-8') as f:
-      tokens = 0
-      for line in f:
-        words = line.split() + ['<eos>']
-        tokens += len(words)
-        for word in words:
-          self.dictionary.add_word(word)
-
-    # Tokenize file content
-    with open(path, 'r', encoding='utf-8') as f:
-      ids = torch.LongTensor(tokens)
-      token = 0
-      for line in f:
-        words = line.split() + ['<eos>']
-        for word in words:
-          ids[token] = self.dictionary.word2idx[word]
-          token += 1
-
-    return ids
-
-class SentCorpus(object):
-  def __init__(self, path):
-    self.dictionary = Dictionary()
-    self.train = self.tokenize(os.path.join(path, 'train.txt'))
-    self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
-    self.test = self.tokenize(os.path.join(path, 'test.txt'))
-
-  def tokenize(self, path):
-    """Tokenizes a text file."""
-    assert os.path.exists(path)
-    # Add words to the dictionary
-    with open(path, 'r', encoding='utf-8') as f:
-      tokens = 0
-      for line in f:
-        words = line.split() + ['<eos>']
-        tokens += len(words)
-        for word in words:
-          self.dictionary.add_word(word)
-
-    # Tokenize file content
-    sents = []
-    with open(path, 'r', encoding='utf-8') as f:
-      for line in f:
-        if not line:
-          continue
-        words = line.split() + ['<eos>']
-        sent = torch.LongTensor(len(words))
-        for i, word in enumerate(words):
-          sent[i] = self.dictionary.word2idx[word]
-        sents.append(sent)
-
-    return sents
-
-class BatchSentLoader(object):
-  def __init__(self, sents, batch_size, pad_id=0, cuda=False, volatile=False):
-    self.sents = sents
-    self.batch_size = batch_size
-    self.sort_sents = sorted(sents, key=lambda x: x.size(0))
-    self.cuda = cuda
-    self.volatile = volatile
-    self.pad_id = pad_id
-
-  def __next__(self):
-    if self.idx >= len(self.sort_sents):
-      raise StopIteration
-
-    batch_size = min(self.batch_size, len(self.sort_sents)-self.idx)
-    batch = self.sort_sents[self.idx:self.idx+batch_size]
-    max_len = max([s.size(0) for s in batch])
-    tensor = torch.LongTensor(max_len, batch_size).fill_(self.pad_id)
-    for i in range(len(batch)):
-      s = batch[i]
-      tensor[:s.size(0),i].copy_(s)
-    if self.cuda:
-      tensor = tensor.cuda()
-
-    self.idx += batch_size
-
-    return tensor
-  
-  next = __next__
-
-  def __iter__(self):
-    self.idx = 0
-    return self
--- a/others/GDAS/lib/datasets/MetaBatchSampler.py
+++ b/others/GDAS/lib/datasets/MetaBatchSampler.py
@@ -1,65 +0,0 @@
-# coding=utf-8
-import numpy as np
-import torch
-
-
-class MetaBatchSampler(object):
-
-  def __init__(self, labels, classes_per_it, num_samples, iterations):
-    '''
-    Initialize MetaBatchSampler
-    Args:
-    - labels: an iterable containing all the labels for the current dataset
-    samples indexes will be infered from this iterable.
-    - classes_per_it: number of random classes for each iteration
-    - num_samples: number of samples for each iteration for each class (support + query)
-    - iterations: number of iterations (episodes) per epoch
-    '''
-    super(MetaBatchSampler, self).__init__()
-    self.labels           = labels.copy()
-    self.classes_per_it   = classes_per_it
-    self.sample_per_class = num_samples
-    self.iterations       = iterations
-
-    self.classes, self.counts = np.unique(self.labels, return_counts=True)
-    assert len(self.classes) == np.max(self.classes) + 1 and np.min(self.classes) == 0
-    assert classes_per_it < len(self.classes), '{:} vs. {:}'.format(classes_per_it, len(self.classes))
-    self.classes = torch.LongTensor(self.classes)
-
-    # create a matrix, indexes, of dim: classes X max(elements per class)
-    # fill it with nans
-    # for every class c, fill the relative row with the indices samples belonging to c
-    # in numel_per_class we store the number of samples for each class/row
-    self.indexes = { x.item() : [] for x in self.classes }
-    indexes = { x.item() : [] for x in self.classes }
-
-    for idx, label in enumerate(self.labels):
-      indexes[ label.item() ].append( idx )
-    for key, value in indexes.items():
-      self.indexes[ key ] = torch.LongTensor( value )
-
-
-  def __iter__(self):
-    # yield a batch of indexes
-    spc = self.sample_per_class
-    cpi = self.classes_per_it
-
-    for it in range(self.iterations):
-      batch_size = spc * cpi
-      batch = torch.LongTensor(batch_size)
-      assert cpi < len(self.classes), '{:} vs. {:}'.format(cpi, len(self.classes))
-      c_idxs = torch.randperm(len(self.classes))[:cpi]
-
-      for i, cls in enumerate(self.classes[c_idxs]):
-        s = slice(i * spc, (i + 1) * spc)
-        num = self.indexes[ cls.item() ].nelement()
-        assert spc < num, '{:} vs. {:}'.format(spc, num)
-        sample_idxs = torch.randperm( num )[:spc]
-        batch[s] = self.indexes[ cls.item() ][sample_idxs]
-
-      batch = batch[torch.randperm(len(batch))]
-      yield batch
-
-  def __len__(self):
-    # returns the number of iterations (episodes) per epoch
-    return self.iterations
--- a/others/GDAS/lib/datasets/TieredImageNet.py
+++ b/others/GDAS/lib/datasets/TieredImageNet.py
@@ -1,84 +0,0 @@
-from __future__ import print_function
-import numpy as np
-from PIL import Image
-import pickle as pkl
-import os, cv2, csv, glob
-import torch
-import torch.utils.data as data
-
-
-class TieredImageNet(data.Dataset):
-
-  def __init__(self, root_dir, split, transform=None):
-    self.split = split
-    self.root_dir = root_dir
-    self.transform = transform
-    splits = split.split('-')
-
-    images, labels, last = [], [], 0
-    for split in splits:
-      labels_name = '{:}/{:}_labels.pkl'.format(self.root_dir, split)
-      images_name = '{:}/{:}_images.npz'.format(self.root_dir, split)
-      # decompress images if npz not exits
-      if not os.path.exists(images_name):
-        png_pkl = images_name[:-4] + '_png.pkl'
-        if os.path.exists(png_pkl):
-          decompress(images_name, png_pkl)
-        else:
-          raise ValueError('png_pkl {:} not exits'.format( png_pkl ))
-      assert os.path.exists(images_name) and os.path.exists(labels_name), '{:} & {:}'.format(images_name, labels_name)
-      print ("Prepare {:} done".format(images_name))
-      try:
-        with open(labels_name) as f:
-          data = pkl.load(f)
-          label_specific = data["label_specific"]
-      except:
-        with open(labels_name, 'rb') as f:
-          data = pkl.load(f, encoding='bytes')
-          label_specific = data[b'label_specific']
-      with np.load(images_name, mmap_mode="r", encoding='latin1') as data:
-        image_data = data["images"]
-      images.append( image_data )
-      label_specific = label_specific + last
-      labels.append( label_specific )
-      last = np.max(label_specific) + 1
-      print ("Load {:} done, with image shape = {:}, label shape = {:}, [{:} ~ {:}]".format(images_name, image_data.shape, label_specific.shape, np.min(label_specific), np.max(label_specific)))
-    images, labels = np.concatenate(images), np.concatenate(labels)
-
-    self.images = images
-    self.labels = labels
-    self.n_classes = int( np.max(labels) + 1 )
-    self.dict_index_label = {}
-    for cls in range(self.n_classes):
-      idxs = np.where(labels==cls)[0]
-      self.dict_index_label[cls] = idxs
-    self.length = len(labels)
-    print ("There are {:} images, {:} labels [{:} ~ {:}]".format(images.shape, labels.shape, np.min(labels), np.max(labels)))
-  
-
-  def __repr__(self):
-    return ('{name}(length={length}, classes={n_classes})'.format(name=self.__class__.__name__, **self.__dict__))
-
-  def __len__(self):
-    return self.length
-
-  def __getitem__(self, index):
-    assert index >= 0 and index < self.length, 'invalid index = {:}'.format(index)
-    image = self.images[index].copy()
-    label = int(self.labels[index])
-    image = Image.fromarray(image[:,:,::-1].astype('uint8'), 'RGB')
-    if self.transform is not None:
-      image = self.transform( image )
-    return image, label
-
-
-
-
-def decompress(path, output):
-  with open(output, 'rb') as f:
-    array = pkl.load(f, encoding='bytes')
-  images = np.zeros([len(array), 84, 84, 3], dtype=np.uint8)
-  for ii, item in enumerate(array):
-    im = cv2.imdecode(item, 1)
-    images[ii] = im
-  np.savez(path, images=images)
--- a/others/GDAS/lib/datasets/init.py
+++ b/others/GDAS/lib/datasets/init.py
@@ -1,7 +0,0 @@
-##################################################
-# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
-##################################################
-from .MetaBatchSampler import MetaBatchSampler
-from .TieredImageNet import TieredImageNet
-from .LanguageDataset import Corpus
-from .get_dataset_with_transform import get_datasets
--- a/others/GDAS/lib/datasets/get_dataset_with_transform.py
+++ b/others/GDAS/lib/datasets/get_dataset_with_transform.py
@@ -1,77 +0,0 @@
-##################################################
-# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
-##################################################
-import os, sys, torch
-import os.path as osp
-import torchvision.datasets as dset
-import torch.backends.cudnn as cudnn
-import torchvision.transforms as transforms
-
-from utils import Cutout
-from .TieredImageNet import TieredImageNet
-
-
-Dataset2Class = {'cifar10' : 10,
-                 'cifar100': 100,
-                 'tiered'  : -1,
-                 'imagenet-1k' : 1000,
-                 'imagenet-100': 100}
-
-
-def get_datasets(name, root, cutout):
-
-  # Mean + Std
-  if name == 'cifar10':
-    mean = [x / 255 for x in [125.3, 123.0, 113.9]]
-    std = [x / 255 for x in [63.0, 62.1, 66.7]]
-  elif name == 'cifar100':
-    mean = [x / 255 for x in [129.3, 124.1, 112.4]]
-    std = [x / 255 for x in [68.2, 65.4, 70.4]]
-  elif name == 'tiered':
-    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
-  elif name == 'imagenet-1k' or name == 'imagenet-100':
-    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
-  else: raise TypeError("Unknow dataset : {:}".format(name))
-
-
-  # Data Argumentation
-  if name == 'cifar10' or name == 'cifar100':
-    lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
-             transforms.Normalize(mean, std)]
-    if cutout > 0 : lists += [Cutout(cutout)]
-    train_transform = transforms.Compose(lists)
-    test_transform  = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
-  elif name == 'tiered':
-    lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(80, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
-    if cutout > 0 : lists += [Cutout(cutout)]
-    train_transform = transforms.Compose(lists)
-    test_transform  = transforms.Compose([transforms.CenterCrop(80), transforms.ToTensor(), transforms.Normalize(mean, std)])
-  elif name == 'imagenet-1k' or name == 'imagenet-100':
-    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-    train_transform = transforms.Compose([
-      transforms.RandomResizedCrop(224),
-      transforms.RandomHorizontalFlip(),
-      transforms.ColorJitter(
-        brightness=0.4,
-        contrast=0.4,
-        saturation=0.4,
-        hue=0.2),
-      transforms.ToTensor(),
-      normalize,
-    ])
-    test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize])
-  else: raise TypeError("Unknow dataset : {:}".format(name))
-
-  if name == 'cifar10':
-    train_data = dset.CIFAR10 (root, train=True , transform=train_transform, download=True)
-    test_data  = dset.CIFAR10 (root, train=False, transform=test_transform , download=True)
-  elif name == 'cifar100':
-    train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True)
-    test_data  = dset.CIFAR100(root, train=False, transform=test_transform , download=True)
-  elif name == 'imagenet-1k' or name == 'imagenet-100':
-    train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform)
-    test_data  = dset.ImageFolder(osp.join(root, 'val'),   test_transform)
-  else: raise TypeError("Unknow dataset : {:}".format(name))
-  
-  class_num = Dataset2Class[name]
-  return train_data, test_data, class_num
--- a/others/GDAS/lib/datasets/test_NLP.py
+++ b/others/GDAS/lib/datasets/test_NLP.py
@@ -1,10 +0,0 @@
-import os, sys, torch
-
-from LanguageDataset import SentCorpus, BatchSentLoader
-
-if __name__ == '__main__':
-  path = '../../data/data/penn'
-  corpus = SentCorpus( path )
-  loader = BatchSentLoader(corpus.test, 10)
-  for i, d in enumerate(loader):
-    print('{:} :: {:}'.format(i, d.size()))
--- a/others/GDAS/lib/datasets/test_dataset.py
+++ b/others/GDAS/lib/datasets/test_dataset.py
@@ -1,33 +0,0 @@
-import os, sys, torch
-import torchvision.transforms as transforms
-
-from TieredImageNet import TieredImageNet
-from MetaBatchSampler import MetaBatchSampler
-
-root_dir = os.environ['TORCH_HOME'] + '/tiered-imagenet'
-print ('root : {:}'.format(root_dir))
-means, stds = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
-
-lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(84, padding=8), transforms.ToTensor(), transforms.Normalize(means, stds)]
-transform = transforms.Compose(lists)
-
-dataset = TieredImageNet(root_dir, 'val-test', transform)
-image, label = dataset[111]
-print ('image shape = {:}, label = {:}'.format(image.size(), label))
-print ('image : min = {:}, max = {:}    ||| label : {:}'.format(image.min(), image.max(), label))
-
-
-sampler = MetaBatchSampler(dataset.labels, 250, 100, 10)
-
-dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler)
-
-print ('the length of dataset : {:}'.format( len(dataset) ))
-print ('the length of loader  : {:}'.format( len(dataloader) ))
-
-for images, labels in dataloader:
-  print ('images : {:}'.format( images.size() ))
-  print ('labels : {:}'.format( labels.size() ))
-  for i in range(3):
-    print ('image-value-[{:}] : {:} ~ {:}, mean={:}, std={:}'.format(i, images[:,i].min(), images[:,i].max(), images[:,i].mean(), images[:,i].std()))
-
-print('-----')