Clean unnecessary files
This commit is contained in:
		| @@ -1,122 +0,0 @@ | ||||
| import os | ||||
| import torch | ||||
|  | ||||
| from collections import Counter | ||||
|  | ||||
|  | ||||
| class Dictionary(object): | ||||
|   def __init__(self): | ||||
|     self.word2idx = {} | ||||
|     self.idx2word = [] | ||||
|     self.counter = Counter() | ||||
|     self.total = 0 | ||||
|  | ||||
|   def add_word(self, word): | ||||
|     if word not in self.word2idx: | ||||
|       self.idx2word.append(word) | ||||
|       self.word2idx[word] = len(self.idx2word) - 1 | ||||
|     token_id = self.word2idx[word] | ||||
|     self.counter[token_id] += 1 | ||||
|     self.total += 1 | ||||
|     return self.word2idx[word] | ||||
|  | ||||
|   def __len__(self): | ||||
|     return len(self.idx2word) | ||||
|  | ||||
|  | ||||
| class Corpus(object): | ||||
|   def __init__(self, path): | ||||
|     self.dictionary = Dictionary() | ||||
|     self.train = self.tokenize(os.path.join(path, 'train.txt')) | ||||
|     self.valid = self.tokenize(os.path.join(path, 'valid.txt')) | ||||
|     self.test = self.tokenize(os.path.join(path, 'test.txt')) | ||||
|  | ||||
|   def tokenize(self, path): | ||||
|     """Tokenizes a text file.""" | ||||
|     assert os.path.exists(path) | ||||
|     # Add words to the dictionary | ||||
|     with open(path, 'r', encoding='utf-8') as f: | ||||
|       tokens = 0 | ||||
|       for line in f: | ||||
|         words = line.split() + ['<eos>'] | ||||
|         tokens += len(words) | ||||
|         for word in words: | ||||
|           self.dictionary.add_word(word) | ||||
|  | ||||
|     # Tokenize file content | ||||
|     with open(path, 'r', encoding='utf-8') as f: | ||||
|       ids = torch.LongTensor(tokens) | ||||
|       token = 0 | ||||
|       for line in f: | ||||
|         words = line.split() + ['<eos>'] | ||||
|         for word in words: | ||||
|           ids[token] = self.dictionary.word2idx[word] | ||||
|           token += 1 | ||||
|  | ||||
|     return ids | ||||
|  | ||||
| class SentCorpus(object): | ||||
|   def __init__(self, path): | ||||
|     self.dictionary = Dictionary() | ||||
|     self.train = self.tokenize(os.path.join(path, 'train.txt')) | ||||
|     self.valid = self.tokenize(os.path.join(path, 'valid.txt')) | ||||
|     self.test = self.tokenize(os.path.join(path, 'test.txt')) | ||||
|  | ||||
|   def tokenize(self, path): | ||||
|     """Tokenizes a text file.""" | ||||
|     assert os.path.exists(path) | ||||
|     # Add words to the dictionary | ||||
|     with open(path, 'r', encoding='utf-8') as f: | ||||
|       tokens = 0 | ||||
|       for line in f: | ||||
|         words = line.split() + ['<eos>'] | ||||
|         tokens += len(words) | ||||
|         for word in words: | ||||
|           self.dictionary.add_word(word) | ||||
|  | ||||
|     # Tokenize file content | ||||
|     sents = [] | ||||
|     with open(path, 'r', encoding='utf-8') as f: | ||||
|       for line in f: | ||||
|         if not line: | ||||
|           continue | ||||
|         words = line.split() + ['<eos>'] | ||||
|         sent = torch.LongTensor(len(words)) | ||||
|         for i, word in enumerate(words): | ||||
|           sent[i] = self.dictionary.word2idx[word] | ||||
|         sents.append(sent) | ||||
|  | ||||
|     return sents | ||||
|  | ||||
| class BatchSentLoader(object): | ||||
|   def __init__(self, sents, batch_size, pad_id=0, cuda=False, volatile=False): | ||||
|     self.sents = sents | ||||
|     self.batch_size = batch_size | ||||
|     self.sort_sents = sorted(sents, key=lambda x: x.size(0)) | ||||
|     self.cuda = cuda | ||||
|     self.volatile = volatile | ||||
|     self.pad_id = pad_id | ||||
|  | ||||
|   def __next__(self): | ||||
|     if self.idx >= len(self.sort_sents): | ||||
|       raise StopIteration | ||||
|  | ||||
|     batch_size = min(self.batch_size, len(self.sort_sents)-self.idx) | ||||
|     batch = self.sort_sents[self.idx:self.idx+batch_size] | ||||
|     max_len = max([s.size(0) for s in batch]) | ||||
|     tensor = torch.LongTensor(max_len, batch_size).fill_(self.pad_id) | ||||
|     for i in range(len(batch)): | ||||
|       s = batch[i] | ||||
|       tensor[:s.size(0),i].copy_(s) | ||||
|     if self.cuda: | ||||
|       tensor = tensor.cuda() | ||||
|  | ||||
|     self.idx += batch_size | ||||
|  | ||||
|     return tensor | ||||
|    | ||||
|   next = __next__ | ||||
|  | ||||
|   def __iter__(self): | ||||
|     self.idx = 0 | ||||
|     return self | ||||
| @@ -1,65 +0,0 @@ | ||||
| # coding=utf-8 | ||||
| import numpy as np | ||||
| import torch | ||||
|  | ||||
|  | ||||
| class MetaBatchSampler(object): | ||||
|  | ||||
|   def __init__(self, labels, classes_per_it, num_samples, iterations): | ||||
|     ''' | ||||
|     Initialize MetaBatchSampler | ||||
|     Args: | ||||
|     - labels: an iterable containing all the labels for the current dataset | ||||
|     samples indexes will be infered from this iterable. | ||||
|     - classes_per_it: number of random classes for each iteration | ||||
|     - num_samples: number of samples for each iteration for each class (support + query) | ||||
|     - iterations: number of iterations (episodes) per epoch | ||||
|     ''' | ||||
|     super(MetaBatchSampler, self).__init__() | ||||
|     self.labels           = labels.copy() | ||||
|     self.classes_per_it   = classes_per_it | ||||
|     self.sample_per_class = num_samples | ||||
|     self.iterations       = iterations | ||||
|  | ||||
|     self.classes, self.counts = np.unique(self.labels, return_counts=True) | ||||
|     assert len(self.classes) == np.max(self.classes) + 1 and np.min(self.classes) == 0 | ||||
|     assert classes_per_it < len(self.classes), '{:} vs. {:}'.format(classes_per_it, len(self.classes)) | ||||
|     self.classes = torch.LongTensor(self.classes) | ||||
|  | ||||
|     # create a matrix, indexes, of dim: classes X max(elements per class) | ||||
|     # fill it with nans | ||||
|     # for every class c, fill the relative row with the indices samples belonging to c | ||||
|     # in numel_per_class we store the number of samples for each class/row | ||||
|     self.indexes = { x.item() : [] for x in self.classes } | ||||
|     indexes = { x.item() : [] for x in self.classes } | ||||
|  | ||||
|     for idx, label in enumerate(self.labels): | ||||
|       indexes[ label.item() ].append( idx ) | ||||
|     for key, value in indexes.items(): | ||||
|       self.indexes[ key ] = torch.LongTensor( value ) | ||||
|  | ||||
|  | ||||
|   def __iter__(self): | ||||
|     # yield a batch of indexes | ||||
|     spc = self.sample_per_class | ||||
|     cpi = self.classes_per_it | ||||
|  | ||||
|     for it in range(self.iterations): | ||||
|       batch_size = spc * cpi | ||||
|       batch = torch.LongTensor(batch_size) | ||||
|       assert cpi < len(self.classes), '{:} vs. {:}'.format(cpi, len(self.classes)) | ||||
|       c_idxs = torch.randperm(len(self.classes))[:cpi] | ||||
|  | ||||
|       for i, cls in enumerate(self.classes[c_idxs]): | ||||
|         s = slice(i * spc, (i + 1) * spc) | ||||
|         num = self.indexes[ cls.item() ].nelement() | ||||
|         assert spc < num, '{:} vs. {:}'.format(spc, num) | ||||
|         sample_idxs = torch.randperm( num )[:spc] | ||||
|         batch[s] = self.indexes[ cls.item() ][sample_idxs] | ||||
|  | ||||
|       batch = batch[torch.randperm(len(batch))] | ||||
|       yield batch | ||||
|  | ||||
|   def __len__(self): | ||||
|     # returns the number of iterations (episodes) per epoch | ||||
|     return self.iterations | ||||
| @@ -1,84 +0,0 @@ | ||||
| from __future__ import print_function | ||||
| import numpy as np | ||||
| from PIL import Image | ||||
| import pickle as pkl | ||||
| import os, cv2, csv, glob | ||||
| import torch | ||||
| import torch.utils.data as data | ||||
|  | ||||
|  | ||||
| class TieredImageNet(data.Dataset): | ||||
|  | ||||
|   def __init__(self, root_dir, split, transform=None): | ||||
|     self.split = split | ||||
|     self.root_dir = root_dir | ||||
|     self.transform = transform | ||||
|     splits = split.split('-') | ||||
|  | ||||
|     images, labels, last = [], [], 0 | ||||
|     for split in splits: | ||||
|       labels_name = '{:}/{:}_labels.pkl'.format(self.root_dir, split) | ||||
|       images_name = '{:}/{:}_images.npz'.format(self.root_dir, split) | ||||
|       # decompress images if npz not exits | ||||
|       if not os.path.exists(images_name): | ||||
|         png_pkl = images_name[:-4] + '_png.pkl' | ||||
|         if os.path.exists(png_pkl): | ||||
|           decompress(images_name, png_pkl) | ||||
|         else: | ||||
|           raise ValueError('png_pkl {:} not exits'.format( png_pkl )) | ||||
|       assert os.path.exists(images_name) and os.path.exists(labels_name), '{:} & {:}'.format(images_name, labels_name) | ||||
|       print ("Prepare {:} done".format(images_name)) | ||||
|       try: | ||||
|         with open(labels_name) as f: | ||||
|           data = pkl.load(f) | ||||
|           label_specific = data["label_specific"] | ||||
|       except: | ||||
|         with open(labels_name, 'rb') as f: | ||||
|           data = pkl.load(f, encoding='bytes') | ||||
|           label_specific = data[b'label_specific'] | ||||
|       with np.load(images_name, mmap_mode="r", encoding='latin1') as data: | ||||
|         image_data = data["images"] | ||||
|       images.append( image_data ) | ||||
|       label_specific = label_specific + last | ||||
|       labels.append( label_specific ) | ||||
|       last = np.max(label_specific) + 1 | ||||
|       print ("Load {:} done, with image shape = {:}, label shape = {:}, [{:} ~ {:}]".format(images_name, image_data.shape, label_specific.shape, np.min(label_specific), np.max(label_specific))) | ||||
|     images, labels = np.concatenate(images), np.concatenate(labels) | ||||
|  | ||||
|     self.images = images | ||||
|     self.labels = labels | ||||
|     self.n_classes = int( np.max(labels) + 1 ) | ||||
|     self.dict_index_label = {} | ||||
|     for cls in range(self.n_classes): | ||||
|       idxs = np.where(labels==cls)[0] | ||||
|       self.dict_index_label[cls] = idxs | ||||
|     self.length = len(labels) | ||||
|     print ("There are {:} images, {:} labels [{:} ~ {:}]".format(images.shape, labels.shape, np.min(labels), np.max(labels))) | ||||
|    | ||||
|  | ||||
|   def __repr__(self): | ||||
|     return ('{name}(length={length}, classes={n_classes})'.format(name=self.__class__.__name__, **self.__dict__)) | ||||
|  | ||||
|   def __len__(self): | ||||
|     return self.length | ||||
|  | ||||
|   def __getitem__(self, index): | ||||
|     assert index >= 0 and index < self.length, 'invalid index = {:}'.format(index) | ||||
|     image = self.images[index].copy() | ||||
|     label = int(self.labels[index]) | ||||
|     image = Image.fromarray(image[:,:,::-1].astype('uint8'), 'RGB') | ||||
|     if self.transform is not None: | ||||
|       image = self.transform( image ) | ||||
|     return image, label | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| def decompress(path, output): | ||||
|   with open(output, 'rb') as f: | ||||
|     array = pkl.load(f, encoding='bytes') | ||||
|   images = np.zeros([len(array), 84, 84, 3], dtype=np.uint8) | ||||
|   for ii, item in enumerate(array): | ||||
|     im = cv2.imdecode(item, 1) | ||||
|     images[ii] = im | ||||
|   np.savez(path, images=images) | ||||
| @@ -1,7 +0,0 @@ | ||||
| ################################################## | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 # | ||||
| ################################################## | ||||
| from .MetaBatchSampler import MetaBatchSampler | ||||
| from .TieredImageNet import TieredImageNet | ||||
| from .LanguageDataset import Corpus | ||||
| from .get_dataset_with_transform import get_datasets | ||||
| @@ -1,77 +0,0 @@ | ||||
| ################################################## | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 # | ||||
| ################################################## | ||||
| import os, sys, torch | ||||
| import os.path as osp | ||||
| import torchvision.datasets as dset | ||||
| import torch.backends.cudnn as cudnn | ||||
| import torchvision.transforms as transforms | ||||
|  | ||||
| from utils import Cutout | ||||
| from .TieredImageNet import TieredImageNet | ||||
|  | ||||
|  | ||||
| Dataset2Class = {'cifar10' : 10, | ||||
|                  'cifar100': 100, | ||||
|                  'tiered'  : -1, | ||||
|                  'imagenet-1k' : 1000, | ||||
|                  'imagenet-100': 100} | ||||
|  | ||||
|  | ||||
| def get_datasets(name, root, cutout): | ||||
|  | ||||
|   # Mean + Std | ||||
|   if name == 'cifar10': | ||||
|     mean = [x / 255 for x in [125.3, 123.0, 113.9]] | ||||
|     std = [x / 255 for x in [63.0, 62.1, 66.7]] | ||||
|   elif name == 'cifar100': | ||||
|     mean = [x / 255 for x in [129.3, 124.1, 112.4]] | ||||
|     std = [x / 255 for x in [68.2, 65.4, 70.4]] | ||||
|   elif name == 'tiered': | ||||
|     mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] | ||||
|   elif name == 'imagenet-1k' or name == 'imagenet-100': | ||||
|     mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] | ||||
|   else: raise TypeError("Unknow dataset : {:}".format(name)) | ||||
|  | ||||
|  | ||||
|   # Data Argumentation | ||||
|   if name == 'cifar10' or name == 'cifar100': | ||||
|     lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), | ||||
|              transforms.Normalize(mean, std)] | ||||
|     if cutout > 0 : lists += [Cutout(cutout)] | ||||
|     train_transform = transforms.Compose(lists) | ||||
|     test_transform  = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)]) | ||||
|   elif name == 'tiered': | ||||
|     lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(80, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)] | ||||
|     if cutout > 0 : lists += [Cutout(cutout)] | ||||
|     train_transform = transforms.Compose(lists) | ||||
|     test_transform  = transforms.Compose([transforms.CenterCrop(80), transforms.ToTensor(), transforms.Normalize(mean, std)]) | ||||
|   elif name == 'imagenet-1k' or name == 'imagenet-100': | ||||
|     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | ||||
|     train_transform = transforms.Compose([ | ||||
|       transforms.RandomResizedCrop(224), | ||||
|       transforms.RandomHorizontalFlip(), | ||||
|       transforms.ColorJitter( | ||||
|         brightness=0.4, | ||||
|         contrast=0.4, | ||||
|         saturation=0.4, | ||||
|         hue=0.2), | ||||
|       transforms.ToTensor(), | ||||
|       normalize, | ||||
|     ]) | ||||
|     test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize]) | ||||
|   else: raise TypeError("Unknow dataset : {:}".format(name)) | ||||
|  | ||||
|   if name == 'cifar10': | ||||
|     train_data = dset.CIFAR10 (root, train=True , transform=train_transform, download=True) | ||||
|     test_data  = dset.CIFAR10 (root, train=False, transform=test_transform , download=True) | ||||
|   elif name == 'cifar100': | ||||
|     train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True) | ||||
|     test_data  = dset.CIFAR100(root, train=False, transform=test_transform , download=True) | ||||
|   elif name == 'imagenet-1k' or name == 'imagenet-100': | ||||
|     train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform) | ||||
|     test_data  = dset.ImageFolder(osp.join(root, 'val'),   test_transform) | ||||
|   else: raise TypeError("Unknow dataset : {:}".format(name)) | ||||
|    | ||||
|   class_num = Dataset2Class[name] | ||||
|   return train_data, test_data, class_num | ||||
| @@ -1,10 +0,0 @@ | ||||
| import os, sys, torch | ||||
|  | ||||
| from LanguageDataset import SentCorpus, BatchSentLoader | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|   path = '../../data/data/penn' | ||||
|   corpus = SentCorpus( path ) | ||||
|   loader = BatchSentLoader(corpus.test, 10) | ||||
|   for i, d in enumerate(loader): | ||||
|     print('{:} :: {:}'.format(i, d.size())) | ||||
| @@ -1,33 +0,0 @@ | ||||
| import os, sys, torch | ||||
| import torchvision.transforms as transforms | ||||
|  | ||||
| from TieredImageNet import TieredImageNet | ||||
| from MetaBatchSampler import MetaBatchSampler | ||||
|  | ||||
| root_dir = os.environ['TORCH_HOME'] + '/tiered-imagenet' | ||||
| print ('root : {:}'.format(root_dir)) | ||||
| means, stds = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] | ||||
|  | ||||
| lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(84, padding=8), transforms.ToTensor(), transforms.Normalize(means, stds)] | ||||
| transform = transforms.Compose(lists) | ||||
|  | ||||
| dataset = TieredImageNet(root_dir, 'val-test', transform) | ||||
| image, label = dataset[111] | ||||
| print ('image shape = {:}, label = {:}'.format(image.size(), label)) | ||||
| print ('image : min = {:}, max = {:}    ||| label : {:}'.format(image.min(), image.max(), label)) | ||||
|  | ||||
|  | ||||
| sampler = MetaBatchSampler(dataset.labels, 250, 100, 10) | ||||
|  | ||||
| dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler) | ||||
|  | ||||
| print ('the length of dataset : {:}'.format( len(dataset) )) | ||||
| print ('the length of loader  : {:}'.format( len(dataloader) )) | ||||
|  | ||||
| for images, labels in dataloader: | ||||
|   print ('images : {:}'.format( images.size() )) | ||||
|   print ('labels : {:}'.format( labels.size() )) | ||||
|   for i in range(3): | ||||
|     print ('image-value-[{:}] : {:} ~ {:}, mean={:}, std={:}'.format(i, images[:,i].min(), images[:,i].max(), images[:,i].mean(), images[:,i].std())) | ||||
|  | ||||
| print('-----') | ||||
		Reference in New Issue
	
	Block a user