update GDAS (TO-FINISH)
This commit is contained in:
		| @@ -48,7 +48,7 @@ CUDA_VISIBLE_DEVICES=0,1 bash ./scripts-search/search-cifar.sh cifar10 ResNet56 | ||||
| args: `cifar10` indicates the dataset name, `ResNet56` indicates the basemodel name, `CIFARX` indicates the searching hyper-parameters, `0.47/0.57` indicates the expected FLOP ratio, `-1` indicates the random seed. | ||||
|  | ||||
|  | ||||
| ## One-Shot Neural Architecture Search via Self-Evaluated Template Network | ||||
| ## [One-Shot Neural Architecture Search via Self-Evaluated Template Network](https://arxiv.org/abs/1910.05733) | ||||
|  | ||||
| <img align="right" src="https://d-x-y.github.com/resources/paper-icon/ICCV-2019-SETN.png" width="450"> | ||||
|  | ||||
| @@ -67,7 +67,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./scripts/nas-infer-train.sh imagenet-1k SETN | ||||
| Searching codes come soon! | ||||
|  | ||||
|  | ||||
| ## [Searching for A Robust Neural Architecture in Four GPU Hours](http://openaccess.thecvf.com/content_CVPR_2019/papers/Dong_Searching_for_a_Robust_Neural_Architecture_in_Four_GPU_Hours_CVPR_2019_paper.pdf) | ||||
| ## [Searching for A Robust Neural Architecture in Four GPU Hours](https://arxiv.org/abs/1910.04465) | ||||
|  | ||||
|  | ||||
| <img align="right" src="https://d-x-y.github.com/resources/paper-icon/CVPR-2019-GDAS.png" width="300"> | ||||
|   | ||||
							
								
								
									
										0
									
								
								lib/models/cell_searchs/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/models/cell_searchs/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										196
									
								
								lib/models/cell_searchs/cells.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										196
									
								
								lib/models/cell_searchs/cells.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,196 @@ | ||||
| import math, torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from copy import deepcopy | ||||
| from .operations import OPS, ReLUConvBN | ||||
|  | ||||
|  | ||||
| class SearchCell(nn.Module): | ||||
|  | ||||
|   def __init__(self, C_in, C_out, stride, max_nodes, op_names): | ||||
|     super(SearchCell, self).__init__() | ||||
|  | ||||
|     self.op_names  = deepcopy(op_names) | ||||
|     self.edges     = nn.ModuleDict() | ||||
|     self.max_nodes = max_nodes | ||||
|     self.in_dim    = C_in | ||||
|     self.out_dim   = C_out | ||||
|     for i in range(1, max_nodes): | ||||
|       for j in range(i): | ||||
|         node_str = '{:}<-{:}'.format(i, j) | ||||
|         if j == 0: | ||||
|           xlists = [OPS[op_name](C_in , C_out, stride) for op_name in op_names] | ||||
|         else: | ||||
|           xlists = [OPS[op_name](C_in , C_out,      1) for op_name in op_names] | ||||
|         self.edges[ node_str ] = nn.ModuleList( xlists ) | ||||
|     self.edge_keys  = sorted(list(self.edges.keys())) | ||||
|     self.edge2index = {key:i for i, key in enumerate(self.edge_keys)} | ||||
|     self.num_edges  = len(self.edges) | ||||
|  | ||||
|   def extra_repr(self): | ||||
|     string = 'info :: {max_nodes} nodes, inC={in_dim}, outC={out_dim}'.format(**self.__dict__) | ||||
|     return string | ||||
|  | ||||
|   def forward(self, inputs, weightss): | ||||
|     nodes = [inputs] | ||||
|     for i in range(1, self.max_nodes): | ||||
|       inter_nodes = [] | ||||
|       for j in range(i): | ||||
|         node_str = '{:}<-{:}'.format(i, j) | ||||
|         weights  = weightss[ self.edge2index[node_str] ] | ||||
|         inter_nodes.append( sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) ) ) | ||||
|       nodes.append( sum(inter_nodes) ) | ||||
|     return nodes[-1] | ||||
|  | ||||
|   # GDAS | ||||
|   def forward_acc(self, inputs, weightss, indexess): | ||||
|     nodes = [inputs] | ||||
|     for i in range(1, self.max_nodes): | ||||
|       inter_nodes = [] | ||||
|       for j in range(i): | ||||
|         node_str = '{:}<-{:}'.format(i, j) | ||||
|         weights  = weightss[ self.edge2index[node_str] ] | ||||
|         indexes  = indexess[ self.edge2index[node_str] ].item() | ||||
|         import pdb; pdb.set_trace() # to-do | ||||
|         #inter_nodes.append( self.edges[node_str][indexes](nodes[j]) * weights[indexes] ) | ||||
|       nodes.append( sum(inter_nodes) ) | ||||
|     return nodes[-1] | ||||
|  | ||||
|   # joint | ||||
|   def forward_joint(self, inputs, weightss): | ||||
|     nodes = [inputs] | ||||
|     for i in range(1, self.max_nodes): | ||||
|       inter_nodes = [] | ||||
|       for j in range(i): | ||||
|         node_str = '{:}<-{:}'.format(i, j) | ||||
|         weights  = weightss[ self.edge2index[node_str] ] | ||||
|         aggregation = sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) ) / weights.numel() | ||||
|         inter_nodes.append( aggregation ) | ||||
|       nodes.append( sum(inter_nodes) ) | ||||
|     return nodes[-1] | ||||
|  | ||||
|   # uniform random sampling per iteration | ||||
|   def forward_urs(self, inputs): | ||||
|     nodes = [inputs] | ||||
|     for i in range(1, self.max_nodes): | ||||
|       while True: # to avoid select zero for all ops | ||||
|         sops, has_non_zero = [], False | ||||
|         for j in range(i): | ||||
|           node_str   = '{:}<-{:}'.format(i, j) | ||||
|           candidates = self.edges[node_str] | ||||
|           select_op  = random.choice(candidates) | ||||
|           sops.append( select_op ) | ||||
|           if not hasattr(select_op, 'is_zero') or select_op.is_zero == False: has_non_zero=True | ||||
|         if has_non_zero: break | ||||
|       inter_nodes = [] | ||||
|       for j, select_op in enumerate(sops): | ||||
|         inter_nodes.append( select_op(nodes[j]) ) | ||||
|       nodes.append( sum(inter_nodes) ) | ||||
|     return nodes[-1] | ||||
|  | ||||
|   # select the argmax | ||||
|   def forward_select(self, inputs, weightss): | ||||
|     nodes = [inputs] | ||||
|     for i in range(1, self.max_nodes): | ||||
|       inter_nodes = [] | ||||
|       for j in range(i): | ||||
|         node_str = '{:}<-{:}'.format(i, j) | ||||
|         weights  = weightss[ self.edge2index[node_str] ] | ||||
|         inter_nodes.append( self.edges[node_str][ weights.argmax().item() ]( nodes[j] ) ) | ||||
|         #inter_nodes.append( sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) ) ) | ||||
|       nodes.append( sum(inter_nodes) ) | ||||
|     return nodes[-1] | ||||
|  | ||||
|   # select the argmax | ||||
|   def forward_dynamic(self, inputs, structure): | ||||
|     nodes = [inputs] | ||||
|     for i in range(1, self.max_nodes): | ||||
|       cur_op_node = structure.nodes[i-1] | ||||
|       inter_nodes = [] | ||||
|       for op_name, j in cur_op_node: | ||||
|         node_str = '{:}<-{:}'.format(i, j) | ||||
|         op_index = self.op_names.index( op_name ) | ||||
|         inter_nodes.append( self.edges[node_str][op_index]( nodes[j] ) ) | ||||
|       nodes.append( sum(inter_nodes) ) | ||||
|     return nodes[-1] | ||||
|  | ||||
|  | ||||
| class InferCell(nn.Module): | ||||
|  | ||||
|   def __init__(self, genotype, C_in, C_out, stride): | ||||
|     super(InferCell, self).__init__() | ||||
|  | ||||
|     self.layers  = nn.ModuleList() | ||||
|     self.node_IN = [] | ||||
|     self.node_IX = [] | ||||
|     self.genotype = deepcopy(genotype) | ||||
|     for i in range(1, len(genotype)): | ||||
|       node_info = genotype[i-1] | ||||
|       cur_index = [] | ||||
|       cur_innod = [] | ||||
|       for (op_name, op_in) in node_info: | ||||
|         if op_in == 0: | ||||
|           layer = OPS[op_name](C_in , C_out, stride) | ||||
|         else: | ||||
|           layer = OPS[op_name](C_out, C_out,      1) | ||||
|         cur_index.append( len(self.layers) ) | ||||
|         cur_innod.append( op_in ) | ||||
|         self.layers.append( layer ) | ||||
|       self.node_IX.append( cur_index ) | ||||
|       self.node_IN.append( cur_innod ) | ||||
|     self.nodes   = len(genotype) | ||||
|     self.in_dim  = C_in | ||||
|     self.out_dim = C_out | ||||
|  | ||||
|   def extra_repr(self): | ||||
|     string = 'info :: nodes={nodes}, inC={in_dim}, outC={out_dim}'.format(**self.__dict__) | ||||
|     laystr = [] | ||||
|     for i, (node_layers, node_innods) in enumerate(zip(self.node_IX,self.node_IN)): | ||||
|       y = ['I{:}-L{:}'.format(_ii, _il) for _il, _ii in zip(node_layers, node_innods)] | ||||
|       x = '{:}<-({:})'.format(i+1, ','.join(y)) | ||||
|       laystr.append( x ) | ||||
|     return string + ', [{:}]'.format( ' | '.join(laystr) ) + ', {:}'.format(self.genotype.tostr()) | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     nodes = [inputs] | ||||
|     for i, (node_layers, node_innods) in enumerate(zip(self.node_IX,self.node_IN)): | ||||
|       node_feature = sum( self.layers[_il](nodes[_ii]) for _il, _ii in zip(node_layers, node_innods) ) | ||||
|       nodes.append( node_feature ) | ||||
|     return nodes[-1] | ||||
|  | ||||
|  | ||||
|  | ||||
| class ResNetBasicblock(nn.Module): | ||||
|  | ||||
|   def __init__(self, inplanes, planes, stride): | ||||
|     super(ResNetBasicblock, self).__init__() | ||||
|     assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride) | ||||
|     self.conv_a = ReLUConvBN(inplanes, planes, 3, stride, 1, 1) | ||||
|     self.conv_b = ReLUConvBN(  planes, planes, 3,      1, 1, 1) | ||||
|     if stride == 2: | ||||
|       self.downsample = nn.Sequential( | ||||
|                            nn.AvgPool2d(kernel_size=2, stride=2, padding=0), | ||||
|                            nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False)) | ||||
|     elif inplanes != planes: | ||||
|       self.downsample = ReLUConvBN(inplanes, planes, 1, 1, 0, 1) | ||||
|     else: | ||||
|       self.downsample = None | ||||
|     self.in_dim  = inplanes | ||||
|     self.out_dim = planes | ||||
|     self.stride  = stride | ||||
|     self.num_conv = 2 | ||||
|  | ||||
|   def extra_repr(self): | ||||
|     string = '{name}(inC={in_dim}, outC={out_dim}, stride={stride})'.format(name=self.__class__.__name__, **self.__dict__) | ||||
|     return string | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|  | ||||
|     basicblock = self.conv_a(inputs) | ||||
|     basicblock = self.conv_b(basicblock) | ||||
|  | ||||
|     if self.downsample is not None: | ||||
|       residual = self.downsample(inputs) | ||||
|     else: | ||||
|       residual = inputs | ||||
|     return residual + basicblock | ||||
							
								
								
									
										113
									
								
								lib/models/cell_searchs/operations.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								lib/models/cell_searchs/operations.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,113 @@ | ||||
| import torch | ||||
| import torch.nn as nn | ||||
|  | ||||
| __all__ = ['OPS', 'ReLUConvBN', 'SearchSpaceNames'] | ||||
|  | ||||
| OPS = { | ||||
|   'none'         : lambda C_in, C_out, stride: Zero(C_in, C_out, stride), | ||||
|   'avg_pool_3x3' : lambda C_in, C_out, stride: POOLING(C_in, C_out, stride, 'avg'), | ||||
|   'max_pool_3x3' : lambda C_in, C_out, stride: POOLING(C_in, C_out, stride, 'max'), | ||||
|   'nor_conv_7x7' : lambda C_in, C_out, stride: ReLUConvBN(C_in, C_out, (7,7), (stride,stride), (3,3), (1,1)), | ||||
|   'nor_conv_3x3' : lambda C_in, C_out, stride: ReLUConvBN(C_in, C_out, (3,3), (stride,stride), (1,1), (1,1)), | ||||
|   'nor_conv_1x1' : lambda C_in, C_out, stride: ReLUConvBN(C_in, C_out, (1,1), (stride,stride), (0,0), (1,1)), | ||||
|   'skip_connect' : lambda C_in, C_out, stride: Identity() if stride == 1 and C_in == C_out else FactorizedReduce(C_in, C_out, stride), | ||||
| } | ||||
|  | ||||
| CONNECT_NAS_BENCHMARK  = ['none', 'skip_connect', 'nor_conv_3x3'] | ||||
|  | ||||
| SearchSpaceNames = {'connect-nas' : CONNECT_NAS_BENCHMARK} | ||||
|  | ||||
|  | ||||
| class POOLING(nn.Module): | ||||
|  | ||||
|   def __init__(self, C_in, C_out, stride, mode): | ||||
|     super(POOLING, self).__init__() | ||||
|     if C_in == C_out: | ||||
|       self.preprocess = None | ||||
|     else: | ||||
|       self.preprocess = ReLUConvBN(C_in, C_out, 1, 1, 0) | ||||
|     if mode == 'avg'  : self.op = nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False) | ||||
|     elif mode == 'max': self.op = nn.MaxPool2d(3, stride=stride, padding=1) | ||||
|     else              : raise ValueError('Invalid mode={:} in POOLING'.format(mode)) | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     if self.preprocess: x = self.preprocess(inputs) | ||||
|     else              : x = inputs | ||||
|     return self.op(x) | ||||
|  | ||||
|  | ||||
| class ReLUConvBN(nn.Module): | ||||
|  | ||||
|   def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation): | ||||
|     super(ReLUConvBN, self).__init__() | ||||
|     self.op = nn.Sequential( | ||||
|       nn.ReLU(inplace=False), | ||||
|       nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, dilation=dilation, bias=False), | ||||
|       nn.BatchNorm2d(C_out) | ||||
|     ) | ||||
|  | ||||
|   def forward(self, x): | ||||
|     return self.op(x) | ||||
|  | ||||
|  | ||||
| class Identity(nn.Module): | ||||
|  | ||||
|   def __init__(self): | ||||
|     super(Identity, self).__init__() | ||||
|  | ||||
|   def forward(self, x): | ||||
|     return x | ||||
|  | ||||
|  | ||||
| class Zero(nn.Module): | ||||
|  | ||||
|   def __init__(self, C_in, C_out, stride): | ||||
|     super(Zero, self).__init__() | ||||
|     self.C_in   = C_in | ||||
|     self.C_out  = C_out | ||||
|     self.stride = stride | ||||
|     self.is_zero = True | ||||
|  | ||||
|   def forward(self, x): | ||||
|     if self.C_in == self.C_out: | ||||
|       if self.stride == 1: return x.mul(0.) | ||||
|       else               : return x[:,:,::self.stride,::self.stride].mul(0.) | ||||
|     else: | ||||
|       shape = list(x.shape) | ||||
|       shape[1] = self.C_out | ||||
|       zeros = x.new_zeros(shape, dtype=x.dtype, device=x.device) | ||||
|       return zeros | ||||
|  | ||||
|   def extra_repr(self): | ||||
|     return 'C_in={C_in}, C_out={C_out}, stride={stride}'.format(**self.__dict__) | ||||
|  | ||||
|  | ||||
| class FactorizedReduce(nn.Module): | ||||
|  | ||||
|   def __init__(self, C_in, C_out, stride): | ||||
|     super(FactorizedReduce, self).__init__() | ||||
|     self.stride = stride | ||||
|     self.C_in   = C_in   | ||||
|     self.C_out  = C_out   | ||||
|     self.relu   = nn.ReLU(inplace=False) | ||||
|     if stride == 2: | ||||
|       #assert C_out % 2 == 0, 'C_out : {:}'.format(C_out) | ||||
|       C_outs = [C_out // 2, C_out - C_out // 2] | ||||
|       self.convs = nn.ModuleList() | ||||
|       for i in range(2): | ||||
|         self.convs.append( nn.Conv2d(C_in, C_outs[i], 1, stride=stride, padding=0, bias=False) ) | ||||
|       self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0) | ||||
|     else: | ||||
|       raise ValueError('Invalid stride : {:}'.format(stride)) | ||||
|      | ||||
|     self.bn = nn.BatchNorm2d(C_out) | ||||
|  | ||||
|   def forward(self, x): | ||||
|     x = self.relu(x) | ||||
|     y = self.pad(x) | ||||
|     out = torch.cat([self.convs[0](x), self.convs[1](y[:,:,1:,1:])], dim=1) | ||||
|     out = self.bn(out) | ||||
|     return out | ||||
|  | ||||
|   def extra_repr(self): | ||||
|     return 'C_in={C_in}, C_out={C_out}, stride={stride}'.format(**self.__dict__) | ||||
							
								
								
									
										117
									
								
								lib/models/cell_searchs/search_model_gdas.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								lib/models/cell_searchs/search_model_gdas.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,117 @@ | ||||
| ################################################## | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 # | ||||
| ########################################################################### | ||||
| # Searching for A Robust Neural Architecture in Four GPU Hours, CVPR 2019 # | ||||
| ########################################################################### | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| from copy import deepcopy | ||||
| from .infer_cells  import ResNetBasicblock | ||||
| from .search_cells import SearchCell | ||||
| from .genotypes    import Structure | ||||
|  | ||||
|  | ||||
| class TinyNetworkGDAS(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, N, max_nodes, num_classes, search_space): | ||||
|     super(TinyNetworkGDAS, self).__init__() | ||||
|     self._C        = C | ||||
|     self._layerN   = N | ||||
|     self.max_nodes = max_nodes | ||||
|     self.stem = nn.Sequential( | ||||
|                     nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False), | ||||
|                     nn.BatchNorm2d(C)) | ||||
|    | ||||
|     layer_channels   = [C    ] * N + [C*2 ] + [C*2  ] * N + [C*4 ] + [C*4  ] * N     | ||||
|     layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N | ||||
|  | ||||
|     C_prev, num_edge, edge2index = C, None, None | ||||
|     self.cells = nn.ModuleList() | ||||
|     for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)): | ||||
|       if reduction: | ||||
|         cell = ResNetBasicblock(C_prev, C_curr, 2) | ||||
|       else: | ||||
|         cell = SearchCell(C_prev, C_curr, 1, max_nodes, search_space) | ||||
|         if num_edge is None: num_edge, edge2index = cell.num_edges, cell.edge2index | ||||
|         else: assert num_edge == cell.num_edges and edge2index == cell.edge2index, 'invalid {:} vs. {:}.'.format(num_edge, cell.num_edges) | ||||
|       self.cells.append( cell ) | ||||
|       C_prev = cell.out_dim | ||||
|     self.op_names   = deepcopy( search_space ) | ||||
|     self._Layer     = len(self.cells) | ||||
|     self.edge2index = edge2index | ||||
|     self.lastact    = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True)) | ||||
|     self.global_pooling = nn.AdaptiveAvgPool2d(1) | ||||
|     self.classifier = nn.Linear(C_prev, num_classes) | ||||
|     self.arch_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) ) | ||||
|     self.tau        = 10 | ||||
|     self.nan_count  = 0 | ||||
|  | ||||
|   def get_weights(self): | ||||
|     xlist = list( self.stem.parameters() ) + list( self.cells.parameters() ) | ||||
|     xlist+= list( self.lastact.parameters() ) + list( self.global_pooling.parameters() ) | ||||
|     xlist+= list( self.classifier.parameters() ) | ||||
|     return xlist | ||||
|  | ||||
|   def set_tau(self, tau, _nan_count=0): | ||||
|     self.tau = tau | ||||
|     self.nan_count = _nan_count | ||||
|  | ||||
|   def get_tau(self): | ||||
|     return self.tau | ||||
|  | ||||
|   def get_alphas(self): | ||||
|     return [self.arch_parameters] | ||||
|  | ||||
|   def get_message(self): | ||||
|     string = self.extra_repr() | ||||
|     for i, cell in enumerate(self.cells): | ||||
|       string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr()) | ||||
|     return string | ||||
|  | ||||
|   def extra_repr(self): | ||||
|     return ('{name}(C={_C}, Max-Nodes={max_nodes}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__)) | ||||
|  | ||||
|   def genotype(self): | ||||
|     genotypes = [] | ||||
|     for i in range(1, self.max_nodes): | ||||
|       xlist = [] | ||||
|       for j in range(i): | ||||
|         node_str = '{:}<-{:}'.format(i, j) | ||||
|         with torch.no_grad(): | ||||
|           weights = self.arch_parameters[ self.edge2index[node_str] ] | ||||
|           op_name = self.op_names[ weights.argmax().item() ] | ||||
|         xlist.append((op_name, j)) | ||||
|       genotypes.append( tuple(xlist) ) | ||||
|     return Structure( genotypes ) | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     def gumbel_softmax(_logits, _tau): | ||||
|       while True: # a trick to avoid the gumbels bug | ||||
|         gumbels    = -torch.empty_like(_logits).exponential_().log() | ||||
|         new_logits = (_logits.log_softmax(dim=1) + gumbels) / _tau | ||||
|         probs      = nn.functional.softmax(new_logits, dim=1) | ||||
|         index      = probs.max(-1, keepdim=True)[1] | ||||
|         if index[0].item() == self.op_names.index('none') and index[3].item() == self.op_names.index('none') and index[5].item() == self.op_names.index('none'): continue | ||||
|         if index[1].item() == self.op_names.index('none') and index[2].item() == self.op_names.index('none') and index[3].item() == self.op_names.index('none') and index[4].item() == self.op_names.index('none'): continue | ||||
|         if index[3].item() == self.op_names.index('none') and index[4].item() == self.op_names.index('none') and index[5].item() == self.op_names.index('none'): continue | ||||
|         if index[3].item() == self.op_names.index('none') and index[0].item() == self.op_names.index('none') and index[1].item() == self.op_names.index('none'): continue | ||||
|         one_h      = torch.zeros_like(_logits).scatter_(-1, index, 1.0) | ||||
|         xres       = one_h - probs.detach() + probs | ||||
|         if (not torch.isinf(gumbels).any()) and (not torch.isinf(probs).any()) and (not torch.isnan(probs).any()): break | ||||
|         self.nan_count += 1 | ||||
|       return xres, index | ||||
|  | ||||
|     feature = self.stem(inputs) | ||||
|     for i, cell in enumerate(self.cells): | ||||
|       if isinstance(cell, SearchCell): | ||||
|         alphas, IDX  = gumbel_softmax(self.arch_parameters, self.tau) | ||||
|         feature = cell.forward_gdas(feature, alphas, IDX.cpu()) | ||||
|       else: | ||||
|         feature = cell(feature) | ||||
|  | ||||
|     out = self.lastact(feature) | ||||
|     out = self.global_pooling( out ) | ||||
|     out = out.view(out.size(0), -1) | ||||
|     logits = self.classifier(out) | ||||
|  | ||||
|     return out, logits | ||||
| @@ -9,8 +9,7 @@ def select2withP(logits, tau, just_prob=False, num=2, eps=1e-7): | ||||
|   else       : | ||||
|     while True: # a trick to avoid the gumbels bug | ||||
|       gumbels = -torch.empty_like(logits).exponential_().log() | ||||
|       new_logits = (logits + gumbels) / tau | ||||
|       #new_logits = (logits.log_softmax(dim=1) + gumbels) / tau | ||||
|       new_logits = (logits.log_softmax(dim=1) + gumbels) / tau | ||||
|       probs = nn.functional.softmax(new_logits, dim=1) | ||||
|       if (not torch.isinf(gumbels).any()) and (not torch.isinf(probs).any()) and (not torch.isnan(probs).any()): break | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user