Clean unnecessary files
This commit is contained in:
		| @@ -1,9 +0,0 @@ | ||||
| # utils | ||||
| from .utils import batchify, get_batch, repackage_hidden | ||||
| # models | ||||
| from .model_search import RNNModelSearch | ||||
| from .model_search import DARTSCellSearch | ||||
| from .basemodel import DARTSCell, RNNModel | ||||
| # architecture | ||||
| from .genotypes import DARTS_V1, DARTS_V2 | ||||
| from .genotypes import GDAS | ||||
| @@ -1,181 +0,0 @@ | ||||
| import math | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from .genotypes import STEPS | ||||
| from .utils import mask2d, LockedDropout, embedded_dropout | ||||
|  | ||||
|  | ||||
| INITRANGE = 0.04 | ||||
|  | ||||
| def none_func(x): | ||||
|   return x * 0 | ||||
|  | ||||
|  | ||||
| class DARTSCell(nn.Module): | ||||
|  | ||||
|   def __init__(self, ninp, nhid, dropouth, dropoutx, genotype): | ||||
|     super(DARTSCell, self).__init__() | ||||
|     self.nhid = nhid | ||||
|     self.dropouth = dropouth | ||||
|     self.dropoutx = dropoutx | ||||
|     self.genotype = genotype | ||||
|  | ||||
|     # genotype is None when doing arch search | ||||
|     steps = len(self.genotype.recurrent) if self.genotype is not None else STEPS | ||||
|     self._W0 = nn.Parameter(torch.Tensor(ninp+nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE)) | ||||
|     self._Ws = nn.ParameterList([ | ||||
|         nn.Parameter(torch.Tensor(nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE)) for i in range(steps) | ||||
|     ]) | ||||
|  | ||||
|   def forward(self, inputs, hidden, arch_probs): | ||||
|     T, B = inputs.size(0), inputs.size(1) | ||||
|  | ||||
|     if self.training: | ||||
|       x_mask = mask2d(B, inputs.size(2), keep_prob=1.-self.dropoutx) | ||||
|       h_mask = mask2d(B, hidden.size(2), keep_prob=1.-self.dropouth) | ||||
|     else: | ||||
|       x_mask = h_mask = None | ||||
|  | ||||
|     hidden = hidden[0] | ||||
|     hiddens = [] | ||||
|     for t in range(T): | ||||
|       hidden = self.cell(inputs[t], hidden, x_mask, h_mask, arch_probs) | ||||
|       hiddens.append(hidden) | ||||
|     hiddens = torch.stack(hiddens) | ||||
|     return hiddens, hiddens[-1].unsqueeze(0) | ||||
|  | ||||
|   def _compute_init_state(self, x, h_prev, x_mask, h_mask): | ||||
|     if self.training: | ||||
|       xh_prev = torch.cat([x * x_mask, h_prev * h_mask], dim=-1) | ||||
|     else: | ||||
|       xh_prev = torch.cat([x, h_prev], dim=-1) | ||||
|     c0, h0 = torch.split(xh_prev.mm(self._W0), self.nhid, dim=-1) | ||||
|     c0 = c0.sigmoid() | ||||
|     h0 = h0.tanh() | ||||
|     s0 = h_prev + c0 * (h0-h_prev) | ||||
|     return s0 | ||||
|  | ||||
|   def _get_activation(self, name): | ||||
|     if name == 'tanh': | ||||
|       f = torch.tanh | ||||
|     elif name == 'relu': | ||||
|       f = torch.relu | ||||
|     elif name == 'sigmoid': | ||||
|       f = torch.sigmoid | ||||
|     elif name == 'identity': | ||||
|       f = lambda x: x | ||||
|     elif name == 'none': | ||||
|       f = none_func | ||||
|     else: | ||||
|       raise NotImplementedError | ||||
|     return f | ||||
|  | ||||
|   def cell(self, x, h_prev, x_mask, h_mask, _): | ||||
|     s0 = self._compute_init_state(x, h_prev, x_mask, h_mask) | ||||
|  | ||||
|     states = [s0] | ||||
|     for i, (name, pred) in enumerate(self.genotype.recurrent): | ||||
|       s_prev = states[pred] | ||||
|       if self.training: | ||||
|         ch = (s_prev * h_mask).mm(self._Ws[i]) | ||||
|       else: | ||||
|         ch = s_prev.mm(self._Ws[i]) | ||||
|       c, h = torch.split(ch, self.nhid, dim=-1) | ||||
|       c = c.sigmoid() | ||||
|       fn = self._get_activation(name) | ||||
|       h = fn(h) | ||||
|       s = s_prev + c * (h-s_prev) | ||||
|       states += [s] | ||||
|     output = torch.mean(torch.stack([states[i] for i in self.genotype.concat], -1), -1) | ||||
|     return output | ||||
|  | ||||
|  | ||||
| class RNNModel(nn.Module): | ||||
|   """Container module with an encoder, a recurrent module, and a decoder.""" | ||||
|   def __init__(self, ntoken, ninp, nhid, nhidlast,  | ||||
|                  dropout=0.5, dropouth=0.5, dropoutx=0.5, dropouti=0.5, dropoute=0.1, | ||||
|                  cell_cls=None, genotype=None): | ||||
|     super(RNNModel, self).__init__() | ||||
|     self.lockdrop = LockedDropout() | ||||
|     self.encoder = nn.Embedding(ntoken, ninp) | ||||
|          | ||||
|     assert ninp == nhid == nhidlast | ||||
|     if cell_cls == DARTSCell: | ||||
|       assert genotype is not None | ||||
|       rnns = [cell_cls(ninp, nhid, dropouth, dropoutx, genotype)] | ||||
|     else: | ||||
|       assert genotype is None | ||||
|       rnns = [cell_cls(ninp, nhid, dropouth, dropoutx)] | ||||
|  | ||||
|     self.rnns    = torch.nn.ModuleList(rnns) | ||||
|     self.decoder = nn.Linear(ninp, ntoken) | ||||
|     self.decoder.weight = self.encoder.weight | ||||
|     self.init_weights() | ||||
|     self.arch_weights = None | ||||
|  | ||||
|     self.ninp = ninp | ||||
|     self.nhid = nhid | ||||
|     self.nhidlast = nhidlast | ||||
|     self.dropout = dropout | ||||
|     self.dropouti = dropouti | ||||
|     self.dropoute = dropoute | ||||
|     self.ntoken = ntoken | ||||
|     self.cell_cls = cell_cls | ||||
|     # acceleration | ||||
|     self.tau = None | ||||
|     self.use_gumbel = False | ||||
|  | ||||
|   def set_gumbel(self, use_gumbel, set_check): | ||||
|     self.use_gumbel = use_gumbel | ||||
|     for i, rnn in enumerate(self.rnns): | ||||
|       rnn.set_check(set_check) | ||||
|  | ||||
|   def set_tau(self, tau): | ||||
|     self.tau = tau | ||||
|    | ||||
|   def get_tau(self): | ||||
|     return self.tau | ||||
|  | ||||
|   def init_weights(self): | ||||
|     self.encoder.weight.data.uniform_(-INITRANGE, INITRANGE) | ||||
|     self.decoder.bias.data.fill_(0) | ||||
|     self.decoder.weight.data.uniform_(-INITRANGE, INITRANGE) | ||||
|  | ||||
|   def forward(self, input, hidden, return_h=False): | ||||
|     batch_size = input.size(1) | ||||
|  | ||||
|     emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) | ||||
|     emb = self.lockdrop(emb, self.dropouti) | ||||
|  | ||||
|     raw_output = emb | ||||
|     new_hidden = [] | ||||
|     raw_outputs = [] | ||||
|     outputs = [] | ||||
|     if self.arch_weights is None: | ||||
|       arch_probs = None | ||||
|     else: | ||||
|       if self.use_gumbel: arch_probs = F.gumbel_softmax(self.arch_weights, self.tau, False) | ||||
|       else              : arch_probs = F.softmax(self.arch_weights, dim=-1) | ||||
|  | ||||
|     for l, rnn in enumerate(self.rnns): | ||||
|       current_input = raw_output | ||||
|       raw_output, new_h = rnn(raw_output, hidden[l], arch_probs) | ||||
|       new_hidden.append(new_h) | ||||
|       raw_outputs.append(raw_output) | ||||
|     hidden = new_hidden | ||||
|  | ||||
|     output = self.lockdrop(raw_output, self.dropout) | ||||
|     outputs.append(output) | ||||
|  | ||||
|     logit = self.decoder(output.view(-1, self.ninp)) | ||||
|     log_prob = nn.functional.log_softmax(logit, dim=-1) | ||||
|     model_output = log_prob | ||||
|     model_output = model_output.view(-1, batch_size, self.ntoken) | ||||
|  | ||||
|     if return_h: return model_output, hidden, raw_outputs, outputs | ||||
|     else       : return model_output, hidden | ||||
|  | ||||
|   def init_hidden(self, bsz): | ||||
|     weight = next(self.parameters()).clone() | ||||
|     return [weight.new(1, bsz, self.nhid).zero_()] | ||||
| @@ -1,55 +0,0 @@ | ||||
| from collections import namedtuple | ||||
|  | ||||
| Genotype = namedtuple('Genotype', 'recurrent concat') | ||||
|  | ||||
| PRIMITIVES = [ | ||||
|     'none', | ||||
|     'tanh', | ||||
|     'relu', | ||||
|     'sigmoid', | ||||
|     'identity' | ||||
| ] | ||||
| STEPS = 8 | ||||
| CONCAT = 8 | ||||
|  | ||||
| ENAS = Genotype( | ||||
|     recurrent = [ | ||||
|         ('tanh', 0), | ||||
|         ('tanh', 1), | ||||
|         ('relu', 1), | ||||
|         ('tanh', 3), | ||||
|         ('tanh', 3), | ||||
|         ('relu', 3), | ||||
|         ('relu', 4), | ||||
|         ('relu', 7), | ||||
|         ('relu', 8), | ||||
|         ('relu', 8), | ||||
|         ('relu', 8), | ||||
|     ], | ||||
|     concat = [2, 5, 6, 9, 10, 11] | ||||
| ) | ||||
|  | ||||
| DARTS_V1 = Genotype( | ||||
|   recurrent = [ | ||||
|     ('relu', 0), | ||||
|     ('relu', 1), | ||||
|     ('tanh', 2), | ||||
|     ('relu', 3), ('relu', 4), ('identity', 1), ('relu', 5), ('relu', 1) | ||||
|   ], | ||||
|   concat=range(1, 9) | ||||
| ) | ||||
|  | ||||
| DARTS_V2 = Genotype( | ||||
|   recurrent = [ | ||||
|     ('sigmoid', 0), ('relu', 1), ('relu', 1), | ||||
|     ('identity', 1), ('tanh', 2), ('sigmoid', 5), | ||||
|     ('tanh', 3), ('relu', 5) | ||||
|   ], | ||||
|   concat=range(1, 9) | ||||
| ) | ||||
|  | ||||
| GDAS = Genotype( | ||||
|   recurrent=[('relu', 0), ('relu', 0), ('identity', 1), ('relu', 1), ('tanh', 0), ('relu', 2), ('identity', 4), ('identity', 2)], | ||||
|   concat=range(1, 9) | ||||
| ) | ||||
|  | ||||
| @@ -1,104 +0,0 @@ | ||||
| import copy, torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from collections import namedtuple | ||||
| from .genotypes import PRIMITIVES, STEPS, CONCAT, Genotype | ||||
| from .basemodel import DARTSCell, RNNModel | ||||
|  | ||||
|  | ||||
| class DARTSCellSearch(DARTSCell): | ||||
|  | ||||
|   def __init__(self, ninp, nhid, dropouth, dropoutx): | ||||
|     super(DARTSCellSearch, self).__init__(ninp, nhid, dropouth, dropoutx, genotype=None) | ||||
|     self.bn = nn.BatchNorm1d(nhid, affine=False) | ||||
|     self.check_zero = False | ||||
|  | ||||
|   def set_check(self, check_zero): | ||||
|     self.check_zero = check_zero | ||||
|  | ||||
|   def cell(self, x, h_prev, x_mask, h_mask, arch_probs): | ||||
|     s0 = self._compute_init_state(x, h_prev, x_mask, h_mask) | ||||
|     s0 = self.bn(s0) | ||||
|     if self.check_zero: | ||||
|       arch_probs_cpu = arch_probs.cpu().tolist() | ||||
|     #arch_probs = F.softmax(self.weights, dim=-1) | ||||
|  | ||||
|     offset = 0 | ||||
|     states = s0.unsqueeze(0) | ||||
|     for i in range(STEPS): | ||||
|       if self.training: | ||||
|         masked_states = states * h_mask.unsqueeze(0) | ||||
|       else: | ||||
|         masked_states = states | ||||
|       ch = masked_states.view(-1, self.nhid).mm(self._Ws[i]).view(i+1, -1, 2*self.nhid) | ||||
|       c, h = torch.split(ch, self.nhid, dim=-1) | ||||
|       c = c.sigmoid() | ||||
|  | ||||
|       s = torch.zeros_like(s0) | ||||
|       for k, name in enumerate(PRIMITIVES): | ||||
|         if name == 'none': | ||||
|           continue | ||||
|         fn = self._get_activation(name) | ||||
|         unweighted = states + c * (fn(h) - states) | ||||
|         if self.check_zero: | ||||
|           INDEX, INDDX = [], [] | ||||
|           for jj in range(offset, offset+i+1): | ||||
|             if arch_probs_cpu[jj][k] > 0: | ||||
|               INDEX.append(jj) | ||||
|               INDDX.append(jj-offset) | ||||
|           if len(INDEX) == 0: continue | ||||
|           s += torch.sum(arch_probs[INDEX, k].unsqueeze(-1).unsqueeze(-1) * unweighted[INDDX, :, :], dim=0) | ||||
|         else: | ||||
|           s += torch.sum(arch_probs[offset:offset+i+1, k].unsqueeze(-1).unsqueeze(-1) * unweighted, dim=0) | ||||
|       s = self.bn(s) | ||||
|       states = torch.cat([states, s.unsqueeze(0)], 0) | ||||
|       offset += i+1 | ||||
|     output = torch.mean(states[-CONCAT:], dim=0) | ||||
|     return output | ||||
|  | ||||
|  | ||||
| class RNNModelSearch(RNNModel): | ||||
|  | ||||
|   def __init__(self, *args): | ||||
|     super(RNNModelSearch, self).__init__(*args) | ||||
|     self._args = copy.deepcopy( args ) | ||||
|  | ||||
|     k = sum(i for i in range(1, STEPS+1)) | ||||
|     self.arch_weights = nn.Parameter(torch.Tensor(k, len(PRIMITIVES))) | ||||
|     nn.init.normal_(self.arch_weights, 0, 0.001) | ||||
|  | ||||
|   def base_parameters(self): | ||||
|     lists  = list(self.lockdrop.parameters()) | ||||
|     lists += list(self.encoder.parameters()) | ||||
|     lists += list(self.rnns.parameters()) | ||||
|     lists += list(self.decoder.parameters()) | ||||
|     return lists | ||||
|  | ||||
|   def arch_parameters(self): | ||||
|     return [self.arch_weights] | ||||
|  | ||||
|   def genotype(self): | ||||
|  | ||||
|     def _parse(probs): | ||||
|       gene = [] | ||||
|       start = 0 | ||||
|       for i in range(STEPS): | ||||
|         end = start + i + 1 | ||||
|         W = probs[start:end].copy() | ||||
|         #j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[0] | ||||
|         j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) ))[0] | ||||
|         k_best = None | ||||
|         for k in range(len(W[j])): | ||||
|           #if k != PRIMITIVES.index('none'): | ||||
|           #  if k_best is None or W[j][k] > W[j][k_best]: | ||||
|           #    k_best = k | ||||
|           if k_best is None or W[j][k] > W[j][k_best]: | ||||
|             k_best = k | ||||
|         gene.append((PRIMITIVES[k_best], j)) | ||||
|         start = end | ||||
|       return gene | ||||
|  | ||||
|     with torch.no_grad(): | ||||
|       gene = _parse(F.softmax(self.arch_weights, dim=-1).cpu().numpy()) | ||||
|     genotype = Genotype(recurrent=gene, concat=list(range(STEPS+1)[-CONCAT:])) | ||||
|     return genotype | ||||
| @@ -1,66 +0,0 @@ | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import os, shutil | ||||
| import numpy as np | ||||
|  | ||||
|  | ||||
| def repackage_hidden(h): | ||||
|   if isinstance(h, torch.Tensor): | ||||
|     return h.detach() | ||||
|   else: | ||||
|     return tuple(repackage_hidden(v) for v in h) | ||||
|  | ||||
|  | ||||
| def batchify(data, bsz, use_cuda): | ||||
|   nbatch = data.size(0) // bsz | ||||
|   data = data.narrow(0, 0, nbatch * bsz) | ||||
|   data = data.view(bsz, -1).t().contiguous() | ||||
|   if use_cuda: return data.cuda() | ||||
|   else     : return data | ||||
|  | ||||
|  | ||||
| def get_batch(source, i, seq_len): | ||||
|   seq_len = min(seq_len, len(source) - 1 - i) | ||||
|   data    = source[i:i+seq_len].clone() | ||||
|   target  = source[i+1:i+1+seq_len].clone() | ||||
|   return data, target | ||||
|  | ||||
|  | ||||
|  | ||||
| def embedded_dropout(embed, words, dropout=0.1, scale=None): | ||||
|   if dropout: | ||||
|     mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout) | ||||
|     mask.requires_grad_(True) | ||||
|     masked_embed_weight = mask * embed.weight | ||||
|   else: | ||||
|     masked_embed_weight = embed.weight | ||||
|   if scale: | ||||
|     masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight | ||||
|  | ||||
|   padding_idx = embed.padding_idx | ||||
|   if padding_idx is None: | ||||
|     padding_idx = -1 | ||||
|   X = torch.nn.functional.embedding( | ||||
|         words, masked_embed_weight, | ||||
|         padding_idx, embed.max_norm, embed.norm_type, | ||||
|         embed.scale_grad_by_freq, embed.sparse) | ||||
|   return X | ||||
|  | ||||
|  | ||||
| class LockedDropout(nn.Module): | ||||
|   def __init__(self): | ||||
|     super(LockedDropout, self).__init__() | ||||
|  | ||||
|   def forward(self, x, dropout=0.5): | ||||
|     if not self.training or not dropout: | ||||
|       return x | ||||
|     m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout) | ||||
|     mask = m.div_(1 - dropout).detach() | ||||
|     mask = mask.expand_as(x) | ||||
|     return mask * x | ||||
|  | ||||
|  | ||||
| def mask2d(B, D, keep_prob, cuda=True): | ||||
|   m = torch.floor(torch.rand(B, D) + keep_prob) / keep_prob | ||||
|   if cuda: return m.cuda() | ||||
|   else   : return m | ||||
		Reference in New Issue
	
	Block a user