add autodl
This commit is contained in:
		
							
								
								
									
										7
									
								
								AutoDL-Projects/xautodl/xlayers/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								AutoDL-Projects/xautodl/xlayers/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 # | ||||
| ##################################################### | ||||
| # This file is expected to be self-contained, expect | ||||
| # for importing from spaces to include search space. | ||||
| ##################################################### | ||||
| from .super_core import * | ||||
							
								
								
									
										154
									
								
								AutoDL-Projects/xautodl/xlayers/misc_utils.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										154
									
								
								AutoDL-Projects/xautodl/xlayers/misc_utils.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,154 @@ | ||||
| # borrowed from https://github.com/arogozhnikov/einops/blob/master/einops/parsing.py | ||||
| import warnings | ||||
| import keyword | ||||
| from typing import List | ||||
|  | ||||
|  | ||||
| class AnonymousAxis: | ||||
|     """Important thing: all instances of this class are not equal to each other""" | ||||
|  | ||||
|     def __init__(self, value: str): | ||||
|         self.value = int(value) | ||||
|         if self.value <= 1: | ||||
|             if self.value == 1: | ||||
|                 raise EinopsError( | ||||
|                     "No need to create anonymous axis of length 1. Report this as an issue" | ||||
|                 ) | ||||
|             else: | ||||
|                 raise EinopsError( | ||||
|                     "Anonymous axis should have positive length, not {}".format( | ||||
|                         self.value | ||||
|                     ) | ||||
|                 ) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return "{}-axis".format(str(self.value)) | ||||
|  | ||||
|  | ||||
| class ParsedExpression: | ||||
|     """ | ||||
|     non-mutable structure that contains information about one side of expression (e.g. 'b c (h w)') | ||||
|     and keeps some information important for downstream | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, expression): | ||||
|         self.identifiers = set() | ||||
|         # that's axes like 2, 3 or 5. Axes with size 1 are exceptional and replaced with empty composition | ||||
|         self.has_non_unitary_anonymous_axes = False | ||||
|         # composition keeps structure of composite axes, see how different corner cases are handled in tests | ||||
|         self.composition = [] | ||||
|         if "." in expression: | ||||
|             raise ValueError("Does not support . in the expression.") | ||||
|  | ||||
|         bracket_group = None | ||||
|  | ||||
|         def add_axis_name(x): | ||||
|             if x is not None: | ||||
|                 if x in self.identifiers: | ||||
|                     raise ValueError( | ||||
|                         'Indexing expression contains duplicate dimension "{}"'.format( | ||||
|                             x | ||||
|                         ) | ||||
|                     ) | ||||
|                 is_number = str.isdecimal(x) | ||||
|                 if is_number and int(x) == 1: | ||||
|                     # handling the case of anonymous axis of length 1 | ||||
|                     if bracket_group is None: | ||||
|                         self.composition.append([]) | ||||
|                     else: | ||||
|                         pass  # no need to think about 1s inside parenthesis | ||||
|                     return | ||||
|                 is_axis_name, reason = self.check_axis_name(x, return_reason=True) | ||||
|                 if not (is_number or is_axis_name): | ||||
|                     raise ValueError( | ||||
|                         "Invalid axis identifier: {}\n{}".format(x, reason) | ||||
|                     ) | ||||
|                 if is_number: | ||||
|                     x = AnonymousAxis(x) | ||||
|                 self.identifiers.add(x) | ||||
|                 if is_number: | ||||
|                     self.has_non_unitary_anonymous_axes = True | ||||
|                 if bracket_group is None: | ||||
|                     self.composition.append([x]) | ||||
|                 else: | ||||
|                     bracket_group.append(x) | ||||
|  | ||||
|         current_identifier = None | ||||
|         for char in expression: | ||||
|             if char in "() ": | ||||
|                 add_axis_name(current_identifier) | ||||
|                 current_identifier = None | ||||
|                 if char == "(": | ||||
|                     if bracket_group is not None: | ||||
|                         raise ValueError( | ||||
|                             "Axis composition is one-level (brackets inside brackets not allowed)" | ||||
|                         ) | ||||
|                     bracket_group = [] | ||||
|                 elif char == ")": | ||||
|                     if bracket_group is None: | ||||
|                         raise ValueError("Brackets are not balanced") | ||||
|                     self.composition.append(bracket_group) | ||||
|                     bracket_group = None | ||||
|             elif str.isalnum(char) or char == "_": | ||||
|                 if current_identifier is None: | ||||
|                     current_identifier = char | ||||
|                 else: | ||||
|                     current_identifier += char | ||||
|             else: | ||||
|                 raise ValueError("Unknown character '{}'".format(char)) | ||||
|  | ||||
|         if bracket_group is not None: | ||||
|             raise ValueError( | ||||
|                 'Imbalanced parentheses in expression: "{}"'.format(expression) | ||||
|             ) | ||||
|         add_axis_name(current_identifier) | ||||
|  | ||||
|     def flat_axes_order(self) -> List: | ||||
|         result = [] | ||||
|         for composed_axis in self.composition: | ||||
|             assert isinstance(composed_axis, list), "does not work with ellipsis" | ||||
|             for axis in composed_axis: | ||||
|                 result.append(axis) | ||||
|         return result | ||||
|  | ||||
|     def has_composed_axes(self) -> bool: | ||||
|         # this will ignore 1 inside brackets | ||||
|         for axes in self.composition: | ||||
|             if isinstance(axes, list) and len(axes) > 1: | ||||
|                 return True | ||||
|         return False | ||||
|  | ||||
|     @staticmethod | ||||
|     def check_axis_name(name: str, return_reason=False): | ||||
|         """ | ||||
|         Valid axes names are python identifiers except keywords, | ||||
|         and additionally should not start or end with underscore | ||||
|         """ | ||||
|         if not str.isidentifier(name): | ||||
|             result = False, "not a valid python identifier" | ||||
|         elif name[0] == "_" or name[-1] == "_": | ||||
|             result = False, "axis name should should not start or end with underscore" | ||||
|         else: | ||||
|             if keyword.iskeyword(name): | ||||
|                 warnings.warn( | ||||
|                     "It is discouraged to use axes names that are keywords: {}".format( | ||||
|                         name | ||||
|                     ), | ||||
|                     RuntimeWarning, | ||||
|                 ) | ||||
|             if name in ["axis"]: | ||||
|                 warnings.warn( | ||||
|                     "It is discouraged to use 'axis' as an axis name " | ||||
|                     "and will raise an error in future", | ||||
|                     FutureWarning, | ||||
|                 ) | ||||
|             result = True, None | ||||
|         if return_reason: | ||||
|             return result | ||||
|         else: | ||||
|             return result[0] | ||||
|  | ||||
|     def __repr__(self) -> str: | ||||
|         return "{name}({composition})".format( | ||||
|             name=self.__class__.__name__, composition=self.composition | ||||
|         ) | ||||
							
								
								
									
										124
									
								
								AutoDL-Projects/xautodl/xlayers/super_activations.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								AutoDL-Projects/xautodl/xlayers/super_activations.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,124 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
| import math | ||||
| from typing import Optional, Callable | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_module import SuperModule | ||||
| from .super_module import IntSpaceType | ||||
| from .super_module import BoolSpaceType | ||||
|  | ||||
|  | ||||
| class SuperReLU(SuperModule): | ||||
|     """Applies a the rectified linear unit function element-wise.""" | ||||
|  | ||||
|     def __init__(self, inplace: bool = False) -> None: | ||||
|         super(SuperReLU, self).__init__() | ||||
|         self._inplace = inplace | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return F.relu(input, inplace=self._inplace) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "inplace=True" if self._inplace else "" | ||||
|  | ||||
|  | ||||
| class SuperGELU(SuperModule): | ||||
|     """Applies a the Gaussian Error Linear Units function element-wise.""" | ||||
|  | ||||
|     def __init__(self) -> None: | ||||
|         super(SuperGELU, self).__init__() | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return F.gelu(input) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|  | ||||
| class SuperSigmoid(SuperModule): | ||||
|     """Applies a the Sigmoid function element-wise.""" | ||||
|  | ||||
|     def __init__(self) -> None: | ||||
|         super(SuperSigmoid, self).__init__() | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return torch.sigmoid(input) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|  | ||||
| class SuperLeakyReLU(SuperModule): | ||||
|     """https://pytorch.org/docs/stable/_modules/torch/nn/modules/activation.html#LeakyReLU""" | ||||
|  | ||||
|     def __init__(self, negative_slope: float = 1e-2, inplace: bool = False) -> None: | ||||
|         super(SuperLeakyReLU, self).__init__() | ||||
|         self._negative_slope = negative_slope | ||||
|         self._inplace = inplace | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return F.leaky_relu(input, self._negative_slope, self._inplace) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         inplace_str = "inplace=True" if self._inplace else "" | ||||
|         return "negative_slope={}{}".format(self._negative_slope, inplace_str) | ||||
|  | ||||
|  | ||||
| class SuperTanh(SuperModule): | ||||
|     """Applies a the Tanh function element-wise.""" | ||||
|  | ||||
|     def __init__(self) -> None: | ||||
|         super(SuperTanh, self).__init__() | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return torch.tanh(input) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         return self.forward_raw(input) | ||||
							
								
								
									
										341
									
								
								AutoDL-Projects/xautodl/xlayers/super_attention.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										341
									
								
								AutoDL-Projects/xautodl/xlayers/super_attention.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,341 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| import math | ||||
| from typing import Optional, Text | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_module import SuperModule | ||||
| from .super_module import IntSpaceType | ||||
| from .super_module import BoolSpaceType | ||||
| from .super_dropout import SuperDropout, SuperDrop | ||||
| from .super_linear import SuperLinear | ||||
|  | ||||
|  | ||||
| class SuperSelfAttention(SuperModule): | ||||
|     """The super model for attention layer.""" | ||||
|  | ||||
|     def __init__( | ||||
|         self, | ||||
|         input_dim: IntSpaceType, | ||||
|         proj_dim: Optional[IntSpaceType], | ||||
|         num_heads: IntSpaceType, | ||||
|         qkv_bias: BoolSpaceType = False, | ||||
|         attn_drop: Optional[float] = None, | ||||
|         proj_drop: Optional[float] = None, | ||||
|         use_mask=False, | ||||
|     ): | ||||
|         super(SuperSelfAttention, self).__init__() | ||||
|         self._input_dim = input_dim | ||||
|         self._proj_dim = proj_dim | ||||
|         self._num_heads = num_heads | ||||
|         self._qkv_bias = qkv_bias | ||||
|         self._use_mask = use_mask | ||||
|         self._infinity = 1e9 | ||||
|  | ||||
|         mul_head_dim = ( | ||||
|             spaces.get_max(input_dim) // spaces.get_min(num_heads) | ||||
|         ) * spaces.get_min(num_heads) | ||||
|         assert mul_head_dim == spaces.get_max(input_dim) | ||||
|         self.q_fc = SuperLinear(input_dim, input_dim, bias=qkv_bias) | ||||
|         self.k_fc = SuperLinear(input_dim, input_dim, bias=qkv_bias) | ||||
|         self.v_fc = SuperLinear(input_dim, input_dim, bias=qkv_bias) | ||||
|  | ||||
|         self.attn_drop = SuperDrop(attn_drop or 0.0, [-1, -1, -1, -1], recover=True) | ||||
|         if proj_dim is not None: | ||||
|             self.proj = SuperLinear(input_dim, proj_dim) | ||||
|             self.proj_drop = SuperDropout(proj_drop or 0.0) | ||||
|         else: | ||||
|             self.proj = None | ||||
|  | ||||
|     @property | ||||
|     def num_heads(self): | ||||
|         return spaces.get_max(self._num_heads) | ||||
|  | ||||
|     @property | ||||
|     def input_dim(self): | ||||
|         return spaces.get_max(self._input_dim) | ||||
|  | ||||
|     @property | ||||
|     def proj_dim(self): | ||||
|         return spaces.get_max(self._proj_dim) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         space_q = self.q_fc.abstract_search_space | ||||
|         space_k = self.k_fc.abstract_search_space | ||||
|         space_v = self.v_fc.abstract_search_space | ||||
|         if not spaces.is_determined(self._num_heads): | ||||
|             root_node.append("_num_heads", self._num_heads.abstract(reuse_last=True)) | ||||
|         if not spaces.is_determined(space_q): | ||||
|             root_node.append("q_fc", space_q) | ||||
|         if not spaces.is_determined(space_k): | ||||
|             root_node.append("k_fc", space_k) | ||||
|         if not spaces.is_determined(space_v): | ||||
|             root_node.append("v_fc", space_v) | ||||
|         if self.proj is not None: | ||||
|             space_proj = self.proj.abstract_search_space | ||||
|             if not spaces.is_determined(space_proj): | ||||
|                 root_node.append("proj", space_proj) | ||||
|         return root_node | ||||
|  | ||||
|     def apply_candidate(self, abstract_child: spaces.VirtualNode): | ||||
|         super(SuperSelfAttention, self).apply_candidate(abstract_child) | ||||
|         if "q_fc" in abstract_child: | ||||
|             self.q_fc.apply_candidate(abstract_child["q_fc"]) | ||||
|         if "k_fc" in abstract_child: | ||||
|             self.k_fc.apply_candidate(abstract_child["k_fc"]) | ||||
|         if "v_fc" in abstract_child: | ||||
|             self.v_fc.apply_candidate(abstract_child["v_fc"]) | ||||
|         if "proj" in abstract_child: | ||||
|             self.proj.apply_candidate(abstract_child["proj"]) | ||||
|  | ||||
|     def forward_qkv(self, input: torch.Tensor, num_head: int) -> torch.Tensor: | ||||
|         B, N, C = input.shape | ||||
|         q = self.q_fc(input) | ||||
|         k = self.k_fc(input) | ||||
|         v = self.v_fc(input) | ||||
|         if num_head > C: | ||||
|             raise ValueError("Invalid num_head [{:}] vs C [{:}]".format(num_head, C)) | ||||
|         head_dim = C // num_head | ||||
|         # process the first [num_head * head_dim] part | ||||
|         q_v1 = ( | ||||
|             q[:, :, : num_head * head_dim] | ||||
|             .reshape(B, N, num_head, head_dim) | ||||
|             .permute(0, 2, 1, 3) | ||||
|         ) | ||||
|         k_v1 = ( | ||||
|             k[:, :, : num_head * head_dim] | ||||
|             .reshape(B, N, num_head, head_dim) | ||||
|             .permute(0, 2, 1, 3) | ||||
|         ) | ||||
|         v_v1 = ( | ||||
|             v[:, :, : num_head * head_dim] | ||||
|             .reshape(B, N, num_head, head_dim) | ||||
|             .permute(0, 2, 1, 3) | ||||
|         ) | ||||
|         attn_v1 = (q_v1 @ k_v1.transpose(-2, -1)) * math.sqrt(head_dim) | ||||
|         if self._use_mask: | ||||
|             mask = torch.triu( | ||||
|                 torch.ones((N, N), dtype=torch.bool, device=input.device), 1 | ||||
|             ) | ||||
|             mask = torch.unsqueeze(torch.unsqueeze(mask, dim=0), dim=0) | ||||
|             attn_v1 = attn_v1.masked_fill(mask, -self._infinity) | ||||
|         attn_v1 = attn_v1.softmax(dim=-1)  # B * #head * N * N | ||||
|         attn_v1 = self.attn_drop(attn_v1) | ||||
|         feats_v1 = (attn_v1 @ v_v1).permute(0, 2, 1, 3).reshape(B, N, -1) | ||||
|         if C == head_dim * num_head: | ||||
|             feats = feats_v1 | ||||
|         else:  # The channels can not be divided by num_head, the remainder forms an additional head | ||||
|             q_v2 = q[:, :, num_head * head_dim :] | ||||
|             k_v2 = k[:, :, num_head * head_dim :] | ||||
|             v_v2 = v[:, :, num_head * head_dim :] | ||||
|             attn_v2 = (q_v2 @ k_v2.transpose(-2, -1)) * math.sqrt(q_v2.shape[-1]) | ||||
|             attn_v2 = attn_v2.softmax(dim=-1) | ||||
|             attn_v2 = self.attn_drop(attn_v2) | ||||
|             feats_v2 = attn_v2 @ v_v2 | ||||
|             feats = torch.cat([feats_v1, feats_v2], dim=-1) | ||||
|         return feats | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         # check the num_heads: | ||||
|         if not spaces.is_determined(self._num_heads): | ||||
|             num_heads = self.abstract_child["_num_heads"].value | ||||
|         else: | ||||
|             num_heads = spaces.get_determined_value(self._num_heads) | ||||
|         feats = self.forward_qkv(input, num_heads) | ||||
|         if self.proj is None: | ||||
|             return feats | ||||
|         else: | ||||
|             outs = self.proj(feats) | ||||
|             outs = self.proj_drop(outs) | ||||
|             return outs | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         feats = self.forward_qkv(input, self.num_heads) | ||||
|         if self.proj is None: | ||||
|             return feats | ||||
|         else: | ||||
|             outs = self.proj(feats) | ||||
|             outs = self.proj_drop(outs) | ||||
|             return outs | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return ( | ||||
|             "input_dim={:}, proj_dim={:}, num_heads={:}, mask={:}, infinity={:}".format( | ||||
|                 self._input_dim, | ||||
|                 self._proj_dim, | ||||
|                 self._num_heads, | ||||
|                 self._use_mask, | ||||
|                 self._infinity, | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class SuperQKVAttention(SuperModule): | ||||
|     """The super model for attention layer.""" | ||||
|  | ||||
|     def __init__( | ||||
|         self, | ||||
|         in_q_dim: IntSpaceType, | ||||
|         in_k_dim: IntSpaceType, | ||||
|         in_v_dim: IntSpaceType, | ||||
|         proj_dim: IntSpaceType, | ||||
|         num_heads: IntSpaceType, | ||||
|         qkv_bias: BoolSpaceType = False, | ||||
|         attn_drop: Optional[float] = None, | ||||
|         proj_drop: Optional[float] = None, | ||||
|     ): | ||||
|         super(SuperQKVAttention, self).__init__() | ||||
|         self._in_v_dim = in_v_dim | ||||
|         self._in_q_dim = in_q_dim | ||||
|         self._in_k_dim = in_k_dim | ||||
|         self._proj_dim = proj_dim | ||||
|         self._num_heads = num_heads | ||||
|         self._qkv_bias = qkv_bias | ||||
|  | ||||
|         self.q_fc = SuperLinear(in_q_dim, proj_dim, bias=qkv_bias) | ||||
|         self.k_fc = SuperLinear(in_k_dim, proj_dim, bias=qkv_bias) | ||||
|         self.v_fc = SuperLinear(in_v_dim, proj_dim, bias=qkv_bias) | ||||
|  | ||||
|         self.attn_drop = nn.Dropout(attn_drop or 0.0) | ||||
|         self.proj = SuperLinear(proj_dim, proj_dim) | ||||
|         self.proj_drop = nn.Dropout(proj_drop or 0.0) | ||||
|         self._infinity = 1e9 | ||||
|  | ||||
|     @property | ||||
|     def num_heads(self): | ||||
|         return spaces.get_max(self._num_heads) | ||||
|  | ||||
|     @property | ||||
|     def in_v_dim(self): | ||||
|         return spaces.get_max(self._in_v_dim) | ||||
|  | ||||
|     @property | ||||
|     def in_q_dim(self): | ||||
|         return spaces.get_max(self._in_q_dim) | ||||
|  | ||||
|     @property | ||||
|     def in_k_dim(self): | ||||
|         return spaces.get_max(self._in_k_dim) | ||||
|  | ||||
|     @property | ||||
|     def proj_dim(self): | ||||
|         return spaces.get_max(self._proj_dim) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         space_q = self.q_fc.abstract_search_space | ||||
|         space_k = self.k_fc.abstract_search_space | ||||
|         space_v = self.v_fc.abstract_search_space | ||||
|         space_proj = self.proj.abstract_search_space | ||||
|         if not spaces.is_determined(self._num_heads): | ||||
|             root_node.append("_num_heads", self._num_heads.abstract(reuse_last=True)) | ||||
|         if not spaces.is_determined(space_q): | ||||
|             root_node.append("q_fc", space_q) | ||||
|         if not spaces.is_determined(space_k): | ||||
|             root_node.append("k_fc", space_k) | ||||
|         if not spaces.is_determined(space_v): | ||||
|             root_node.append("v_fc", space_v) | ||||
|         if not spaces.is_determined(space_proj): | ||||
|             root_node.append("proj", space_proj) | ||||
|         return root_node | ||||
|  | ||||
|     def apply_candidate(self, abstract_child: spaces.VirtualNode): | ||||
|         super(SuperQKVAttention, self).apply_candidate(abstract_child) | ||||
|         if "q_fc" in abstract_child: | ||||
|             self.q_fc.apply_candidate(abstract_child["q_fc"]) | ||||
|         if "k_fc" in abstract_child: | ||||
|             self.k_fc.apply_candidate(abstract_child["k_fc"]) | ||||
|         if "v_fc" in abstract_child: | ||||
|             self.v_fc.apply_candidate(abstract_child["v_fc"]) | ||||
|         if "proj" in abstract_child: | ||||
|             self.proj.apply_candidate(abstract_child["proj"]) | ||||
|  | ||||
|     def forward_qkv( | ||||
|         self, q_tensor, k_tensor, v_tensor, num_head: int, mask=None | ||||
|     ) -> torch.Tensor: | ||||
|         q = self.q_fc(q_tensor) | ||||
|         B, N, C = q.shape | ||||
|  | ||||
|         k = self.k_fc(k_tensor) | ||||
|         B0, S, _ = k.shape | ||||
|  | ||||
|         v = self.v_fc(v_tensor) | ||||
|         assert B0 == v.shape[0] and S == v.shape[1] | ||||
|  | ||||
|         head_dim = C // num_head | ||||
|         if num_head > C: | ||||
|             raise ValueError("Invalid num_head [{:}] vs C [{:}]".format(num_head, C)) | ||||
|         q_v1 = ( | ||||
|             q[:, :, : num_head * head_dim] | ||||
|             .reshape(B, N, num_head, head_dim) | ||||
|             .permute(0, 2, 1, 3) | ||||
|         ) | ||||
|         k_v1 = ( | ||||
|             k[:, :, : num_head * head_dim] | ||||
|             .reshape(B0, S, num_head, head_dim) | ||||
|             .permute(0, 2, 1, 3) | ||||
|         ) | ||||
|         # compute the attention map | ||||
|         attn_v1 = (q_v1 @ k_v1.transpose(-2, -1)) * math.sqrt(head_dim) | ||||
|         if mask is not None: | ||||
|             mask = torch.unsqueeze(mask, dim=1) | ||||
|             attn_v1 = attn_v1.masked_fill(mask, -self._infinity) | ||||
|         attn_v1 = attn_v1.softmax(dim=-1)  # B * #head * N * S | ||||
|         attn_v1 = self.attn_drop(attn_v1) | ||||
|  | ||||
|         v_v1 = ( | ||||
|             v[:, :, : num_head * head_dim] | ||||
|             .reshape(B0, S, num_head, head_dim) | ||||
|             .permute(0, 2, 1, 3) | ||||
|         ) | ||||
|         feats_v1 = (attn_v1 @ v_v1).permute(0, 2, 1, 3).reshape(B, N, -1) | ||||
|         # process the first [num_head * head_dim] part | ||||
|         if C == head_dim * num_head: | ||||
|             feats = feats_v1 | ||||
|         else:  # The channels can not be divided by num_head, the remainder forms an additional head | ||||
|             # [might have bugs, did not check yet] | ||||
|             q_v2 = q[:, :, num_head * head_dim :] | ||||
|             k_v2 = k[:, :, num_head * head_dim :] | ||||
|             v_v2 = v[:, :, num_head * head_dim :] | ||||
|             attn_v2 = (q_v2 @ k_v2.transpose(-2, -1)) * math.sqrt(q_v2.shape[-1]) | ||||
|             attn_v2 = attn_v2.softmax(dim=-1) | ||||
|             attn_v2 = self.attn_drop(attn_v2) | ||||
|             feats_v2 = attn_v2 @ v_v2 | ||||
|             feats = torch.cat([feats_v1, feats_v2], dim=-1) | ||||
|         return feats | ||||
|  | ||||
|     def forward_candidate( | ||||
|         self, q_tensor, k_tensor, v_tensor, mask=None | ||||
|     ) -> torch.Tensor: | ||||
|         # check the num_heads: | ||||
|         if not spaces.is_determined(self._num_heads): | ||||
|             num_heads = self.abstract_child["_num_heads"].value | ||||
|         else: | ||||
|             num_heads = spaces.get_determined_value(self._num_heads) | ||||
|         feats = self.forward_qkv(q_tensor, k_tensor, v_tensor, num_heads, mask) | ||||
|         outs = self.proj(feats) | ||||
|         outs = self.proj_drop(outs) | ||||
|         return outs | ||||
|  | ||||
|     def forward_raw(self, q_tensor, k_tensor, v_tensor, mask=None) -> torch.Tensor: | ||||
|         feats = self.forward_qkv(q_tensor, k_tensor, v_tensor, self.num_heads, mask) | ||||
|         outs = self.proj(feats) | ||||
|         outs = self.proj_drop(outs) | ||||
|         return outs | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "input_dim={:}, proj_dim={:}, num_heads={:}, infinity={:}".format( | ||||
|             (self.in_q_dim, self.in_k_dim, self.in_v_dim), | ||||
|             self._proj_dim, | ||||
|             self._num_heads, | ||||
|             self._infinity, | ||||
|         ) | ||||
							
								
								
									
										113
									
								
								AutoDL-Projects/xautodl/xlayers/super_attention_v2.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								AutoDL-Projects/xautodl/xlayers/super_attention_v2.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,113 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| import math | ||||
| from typing import Optional, Text | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_module import SuperModule | ||||
| from .super_module import IntSpaceType | ||||
| from .super_module import BoolSpaceType | ||||
| from .super_linear import SuperLinear | ||||
|  | ||||
|  | ||||
| class SuperQKVAttentionV2(SuperModule): | ||||
|     """The super model for attention layer.""" | ||||
|  | ||||
|     def __init__( | ||||
|         self, | ||||
|         qk_att_dim: int, | ||||
|         in_v_dim: int, | ||||
|         hidden_dim: int, | ||||
|         num_heads: int, | ||||
|         proj_dim: int, | ||||
|         qkv_bias: bool = False, | ||||
|         attn_drop: Optional[float] = None, | ||||
|         proj_drop: Optional[float] = None, | ||||
|     ): | ||||
|         super(SuperQKVAttentionV2, self).__init__() | ||||
|         self._in_v_dim = in_v_dim | ||||
|         self._qk_att_dim = qk_att_dim | ||||
|         self._proj_dim = proj_dim | ||||
|         self._hidden_dim = hidden_dim | ||||
|         self._num_heads = num_heads | ||||
|         self._qkv_bias = qkv_bias | ||||
|  | ||||
|         self.qk_fc = SuperLinear(qk_att_dim, num_heads, bias=qkv_bias) | ||||
|         self.v_fc = SuperLinear(in_v_dim, hidden_dim * num_heads, bias=qkv_bias) | ||||
|  | ||||
|         self.attn_drop = nn.Dropout(attn_drop or 0.0) | ||||
|         self.proj = SuperLinear(hidden_dim * num_heads, proj_dim) | ||||
|         self.proj_drop = nn.Dropout(proj_drop or 0.0) | ||||
|         self._infinity = 1e9 | ||||
|  | ||||
|     @property | ||||
|     def num_heads(self): | ||||
|         return spaces.get_max(self._num_heads) | ||||
|  | ||||
|     @property | ||||
|     def in_v_dim(self): | ||||
|         return spaces.get_max(self._in_v_dim) | ||||
|  | ||||
|     @property | ||||
|     def qk_att_dim(self): | ||||
|         return spaces.get_max(self._qk_att_dim) | ||||
|  | ||||
|     @property | ||||
|     def hidden_dim(self): | ||||
|         return spaces.get_max(self._hidden_dim) | ||||
|  | ||||
|     @property | ||||
|     def proj_dim(self): | ||||
|         return spaces.get_max(self._proj_dim) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     def apply_candidate(self, abstract_child: spaces.VirtualNode): | ||||
|         super(SuperQKVAttentionV2, self).apply_candidate(abstract_child) | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     def forward_qkv( | ||||
|         self, qk_att_tensor, v_tensor, num_head: int, mask=None | ||||
|     ) -> torch.Tensor: | ||||
|         qk_att = self.qk_fc(qk_att_tensor) | ||||
|         B, N, S, _ = qk_att.shape | ||||
|         assert _ == num_head | ||||
|         attn_v1 = qk_att.permute(0, 3, 1, 2) | ||||
|         if mask is not None: | ||||
|             mask = torch.unsqueeze(mask, dim=1) | ||||
|             attn_v1 = attn_v1.masked_fill(mask, -self._infinity) | ||||
|         attn_v1 = attn_v1.softmax(dim=-1)  # B * #head * N * S | ||||
|         attn_v1 = self.attn_drop(attn_v1) | ||||
|  | ||||
|         v = self.v_fc(v_tensor) | ||||
|         B0, _, _ = v.shape | ||||
|         v_v1 = v.reshape(B0, S, num_head, -1).permute(0, 2, 1, 3) | ||||
|         feats_v1 = (attn_v1 @ v_v1).permute(0, 2, 1, 3).reshape(B, N, -1) | ||||
|         return feats_v1 | ||||
|  | ||||
|     def forward_candidate(self, qk_att_tensor, v_tensor, mask=None) -> torch.Tensor: | ||||
|         return self.forward_raw(qk_att_tensor, v_tensor, mask) | ||||
|  | ||||
|     def forward_raw(self, qk_att_tensor, v_tensor, mask=None) -> torch.Tensor: | ||||
|         feats = self.forward_qkv(qk_att_tensor, v_tensor, self.num_heads, mask) | ||||
|         outs = self.proj(feats) | ||||
|         outs = self.proj_drop(outs) | ||||
|         return outs | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "input_dim={:}, hidden_dim={:}, proj_dim={:}, num_heads={:}, infinity={:}".format( | ||||
|             (self.qk_att_dim, self.in_v_dim), | ||||
|             self._hidden_dim, | ||||
|             self._proj_dim, | ||||
|             self._num_heads, | ||||
|             self._infinity, | ||||
|         ) | ||||
							
								
								
									
										120
									
								
								AutoDL-Projects/xautodl/xlayers/super_container.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								AutoDL-Projects/xautodl/xlayers/super_container.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,120 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| import torch | ||||
|  | ||||
| from itertools import islice | ||||
| import operator | ||||
|  | ||||
| from collections import OrderedDict | ||||
| from typing import Optional, Union, Callable, TypeVar, Iterator | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_module import SuperModule | ||||
|  | ||||
|  | ||||
| T = TypeVar("T", bound=SuperModule) | ||||
|  | ||||
|  | ||||
| class SuperSequential(SuperModule): | ||||
|     """A sequential container wrapped with 'Super' ability. | ||||
|  | ||||
|     Modules will be added to it in the order they are passed in the constructor. | ||||
|     Alternatively, an ordered dict of modules can also be passed in. | ||||
|     To make it easier to understand, here is a small example:: | ||||
|         # Example of using Sequential | ||||
|         model = SuperSequential( | ||||
|                   nn.Conv2d(1,20,5), | ||||
|                   nn.ReLU(), | ||||
|                   nn.Conv2d(20,64,5), | ||||
|                   nn.ReLU() | ||||
|                 ) | ||||
|         # Example of using Sequential with OrderedDict | ||||
|         model = nn.Sequential(OrderedDict([ | ||||
|                   ('conv1', nn.Conv2d(1,20,5)), | ||||
|                   ('relu1', nn.ReLU()), | ||||
|                   ('conv2', nn.Conv2d(20,64,5)), | ||||
|                   ('relu2', nn.ReLU()) | ||||
|                 ])) | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, *args): | ||||
|         super(SuperSequential, self).__init__() | ||||
|         if len(args) == 1 and isinstance(args[0], OrderedDict): | ||||
|             for key, module in args[0].items(): | ||||
|                 self.add_module(key, module) | ||||
|         else: | ||||
|             if not isinstance(args, (list, tuple)): | ||||
|                 raise ValueError("Invalid input type: {:}".format(type(args))) | ||||
|             for idx, module in enumerate(args): | ||||
|                 self.add_module(str(idx), module) | ||||
|  | ||||
|     def _get_item_by_idx(self, iterator, idx) -> T: | ||||
|         """Get the idx-th item of the iterator""" | ||||
|         size = len(self) | ||||
|         idx = operator.index(idx) | ||||
|         if not -size <= idx < size: | ||||
|             raise IndexError("index {} is out of range".format(idx)) | ||||
|         idx %= size | ||||
|         return next(islice(iterator, idx, None)) | ||||
|  | ||||
|     def __getitem__(self, idx) -> Union["SuperSequential", T]: | ||||
|         if isinstance(idx, slice): | ||||
|             return self.__class__(OrderedDict(list(self._modules.items())[idx])) | ||||
|         else: | ||||
|             return self._get_item_by_idx(self._modules.values(), idx) | ||||
|  | ||||
|     def __setitem__(self, idx: int, module: SuperModule) -> None: | ||||
|         key: str = self._get_item_by_idx(self._modules.keys(), idx) | ||||
|         return setattr(self, key, module) | ||||
|  | ||||
|     def __delitem__(self, idx: Union[slice, int]) -> None: | ||||
|         if isinstance(idx, slice): | ||||
|             for key in list(self._modules.keys())[idx]: | ||||
|                 delattr(self, key) | ||||
|         else: | ||||
|             key = self._get_item_by_idx(self._modules.keys(), idx) | ||||
|             delattr(self, key) | ||||
|  | ||||
|     def __len__(self) -> int: | ||||
|         return len(self._modules) | ||||
|  | ||||
|     def __dir__(self): | ||||
|         keys = super(SuperSequential, self).__dir__() | ||||
|         keys = [key for key in keys if not key.isdigit()] | ||||
|         return keys | ||||
|  | ||||
|     def __iter__(self) -> Iterator[SuperModule]: | ||||
|         return iter(self._modules.values()) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         for index, module in enumerate(self): | ||||
|             if not isinstance(module, SuperModule): | ||||
|                 continue | ||||
|             space = module.abstract_search_space | ||||
|             if not spaces.is_determined(space): | ||||
|                 root_node.append(str(index), space) | ||||
|         return root_node | ||||
|  | ||||
|     def apply_candidate(self, abstract_child: spaces.VirtualNode): | ||||
|         super(SuperSequential, self).apply_candidate(abstract_child) | ||||
|         for index, module in enumerate(self): | ||||
|             if str(index) in abstract_child: | ||||
|                 module.apply_candidate(abstract_child[str(index)]) | ||||
|  | ||||
|     def forward_candidate(self, input): | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input): | ||||
|         for module in self: | ||||
|             input = module(input) | ||||
|         return input | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         for index, module in enumerate(self): | ||||
|             input = module.forward_with_container( | ||||
|                 input, container, prefix + [str(index)] | ||||
|             ) | ||||
|         return input | ||||
							
								
								
									
										51
									
								
								AutoDL-Projects/xautodl/xlayers/super_core.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								AutoDL-Projects/xautodl/xlayers/super_core.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,51 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| from .super_module import SuperRunMode | ||||
| from .super_module import IntSpaceType | ||||
| from .super_module import LayerOrder | ||||
|  | ||||
| from .super_module import SuperModule | ||||
| from .super_container import SuperSequential | ||||
| from .super_linear import SuperLinear | ||||
| from .super_linear import SuperMLPv1, SuperMLPv2 | ||||
|  | ||||
| from .super_norm import SuperSimpleNorm | ||||
| from .super_norm import SuperLayerNorm1D | ||||
| from .super_norm import SuperSimpleLearnableNorm | ||||
| from .super_norm import SuperIdentity | ||||
| from .super_dropout import SuperDropout | ||||
| from .super_dropout import SuperDrop | ||||
|  | ||||
| super_name2norm = { | ||||
|     "simple_norm": SuperSimpleNorm, | ||||
|     "simple_learn_norm": SuperSimpleLearnableNorm, | ||||
|     "layer_norm_1d": SuperLayerNorm1D, | ||||
|     "identity": SuperIdentity, | ||||
| } | ||||
|  | ||||
| from .super_attention import SuperSelfAttention | ||||
| from .super_attention import SuperQKVAttention | ||||
| from .super_attention_v2 import SuperQKVAttentionV2 | ||||
| from .super_transformer import SuperTransformerEncoderLayer | ||||
|  | ||||
| from .super_activations import SuperReLU | ||||
| from .super_activations import SuperLeakyReLU | ||||
| from .super_activations import SuperTanh | ||||
| from .super_activations import SuperGELU | ||||
| from .super_activations import SuperSigmoid | ||||
|  | ||||
| super_name2activation = { | ||||
|     "relu": SuperReLU, | ||||
|     "sigmoid": SuperSigmoid, | ||||
|     "gelu": SuperGELU, | ||||
|     "leaky_relu": SuperLeakyReLU, | ||||
|     "tanh": SuperTanh, | ||||
| } | ||||
|  | ||||
|  | ||||
| from .super_trade_stem import SuperAlphaEBDv1 | ||||
| from .super_positional_embedding import SuperDynamicPositionE | ||||
| from .super_positional_embedding import SuperPositionalEncoder | ||||
|  | ||||
| from .super_rearrange import SuperReArrange | ||||
							
								
								
									
										83
									
								
								AutoDL-Projects/xautodl/xlayers/super_dropout.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								AutoDL-Projects/xautodl/xlayers/super_dropout.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
| import math | ||||
| from typing import Optional, Callable, Tuple | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_module import SuperModule | ||||
| from .super_module import IntSpaceType | ||||
| from .super_module import BoolSpaceType | ||||
|  | ||||
|  | ||||
| class SuperDropout(SuperModule): | ||||
|     """Applies a the dropout function element-wise.""" | ||||
|  | ||||
|     def __init__(self, p: float = 0.5, inplace: bool = False) -> None: | ||||
|         super(SuperDropout, self).__init__() | ||||
|         self._p = p | ||||
|         self._inplace = inplace | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return F.dropout(input, self._p, self.training, self._inplace) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         xstr = "inplace=True" if self._inplace else "" | ||||
|         return "p={:}".format(self._p) + ", " + xstr | ||||
|  | ||||
|  | ||||
| class SuperDrop(SuperModule): | ||||
|     """Applies a the drop-path function element-wise.""" | ||||
|  | ||||
|     def __init__(self, p: float, dims: Tuple[int], recover: bool = True) -> None: | ||||
|         super(SuperDrop, self).__init__() | ||||
|         self._p = p | ||||
|         self._dims = dims | ||||
|         self._recover = recover | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         if not self.training or self._p <= 0: | ||||
|             return input | ||||
|         keep_prob = 1 - self._p | ||||
|         shape = [input.shape[0]] + [ | ||||
|             x if y == -1 else y for x, y in zip(input.shape[1:], self._dims) | ||||
|         ] | ||||
|         random_tensor = keep_prob + torch.rand( | ||||
|             shape, dtype=input.dtype, device=input.device | ||||
|         ) | ||||
|         random_tensor.floor_()  # binarize | ||||
|         if self._recover: | ||||
|             return input.div(keep_prob) * random_tensor | ||||
|         else: | ||||
|             return input * random_tensor  # as masks | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return ( | ||||
|             "p={:}".format(self._p) | ||||
|             + ", dims={:}".format(self._dims) | ||||
|             + ", recover={:}".format(self._recover) | ||||
|         ) | ||||
							
								
								
									
										310
									
								
								AutoDL-Projects/xautodl/xlayers/super_linear.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										310
									
								
								AutoDL-Projects/xautodl/xlayers/super_linear.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,310 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
| import math | ||||
| from typing import Optional, Callable | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_module import SuperModule | ||||
| from .super_module import IntSpaceType | ||||
| from .super_module import BoolSpaceType | ||||
|  | ||||
|  | ||||
| class SuperLinear(SuperModule): | ||||
|     """Applies a linear transformation to the incoming data: :math:`y = xA^T + b`""" | ||||
|  | ||||
|     def __init__( | ||||
|         self, | ||||
|         in_features: IntSpaceType, | ||||
|         out_features: IntSpaceType, | ||||
|         bias: BoolSpaceType = True, | ||||
|     ) -> None: | ||||
|         super(SuperLinear, self).__init__() | ||||
|  | ||||
|         # the raw input args | ||||
|         self._in_features = in_features | ||||
|         self._out_features = out_features | ||||
|         self._bias = bias | ||||
|         # weights to be optimized | ||||
|         self.register_parameter( | ||||
|             "_super_weight", | ||||
|             torch.nn.Parameter(torch.Tensor(self.out_features, self.in_features)), | ||||
|         ) | ||||
|         if self.bias: | ||||
|             self.register_parameter( | ||||
|                 "_super_bias", torch.nn.Parameter(torch.Tensor(self.out_features)) | ||||
|             ) | ||||
|         else: | ||||
|             self.register_parameter("_super_bias", None) | ||||
|         self.reset_parameters() | ||||
|  | ||||
|     @property | ||||
|     def in_features(self): | ||||
|         return spaces.get_max(self._in_features) | ||||
|  | ||||
|     @property | ||||
|     def out_features(self): | ||||
|         return spaces.get_max(self._out_features) | ||||
|  | ||||
|     @property | ||||
|     def bias(self): | ||||
|         return spaces.has_categorical(self._bias, True) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         if not spaces.is_determined(self._in_features): | ||||
|             root_node.append( | ||||
|                 "_in_features", self._in_features.abstract(reuse_last=True) | ||||
|             ) | ||||
|         if not spaces.is_determined(self._out_features): | ||||
|             root_node.append( | ||||
|                 "_out_features", self._out_features.abstract(reuse_last=True) | ||||
|             ) | ||||
|         if not spaces.is_determined(self._bias): | ||||
|             root_node.append("_bias", self._bias.abstract(reuse_last=True)) | ||||
|         return root_node | ||||
|  | ||||
|     def reset_parameters(self) -> None: | ||||
|         nn.init.kaiming_uniform_(self._super_weight, a=math.sqrt(5)) | ||||
|         if self.bias: | ||||
|             fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self._super_weight) | ||||
|             bound = 1 / math.sqrt(fan_in) | ||||
|             nn.init.uniform_(self._super_bias, -bound, bound) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         # check inputs -> | ||||
|         if not spaces.is_determined(self._in_features): | ||||
|             expected_input_dim = self.abstract_child["_in_features"].value | ||||
|         else: | ||||
|             expected_input_dim = spaces.get_determined_value(self._in_features) | ||||
|         if input.size(-1) != expected_input_dim: | ||||
|             raise ValueError( | ||||
|                 "Expect the input dim of {:} instead of {:}".format( | ||||
|                     expected_input_dim, input.size(-1) | ||||
|                 ) | ||||
|             ) | ||||
|         # create the weight matrix | ||||
|         if not spaces.is_determined(self._out_features): | ||||
|             out_dim = self.abstract_child["_out_features"].value | ||||
|         else: | ||||
|             out_dim = spaces.get_determined_value(self._out_features) | ||||
|         candidate_weight = self._super_weight[:out_dim, :expected_input_dim] | ||||
|         # create the bias matrix | ||||
|         if not spaces.is_determined(self._bias): | ||||
|             if self.abstract_child["_bias"].value: | ||||
|                 candidate_bias = self._super_bias[:out_dim] | ||||
|             else: | ||||
|                 candidate_bias = None | ||||
|         else: | ||||
|             if spaces.get_determined_value(self._bias): | ||||
|                 candidate_bias = self._super_bias[:out_dim] | ||||
|             else: | ||||
|                 candidate_bias = None | ||||
|         return F.linear(input, candidate_weight, candidate_bias) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return F.linear(input, self._super_weight, self._super_bias) | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "in_features={:}, out_features={:}, bias={:}".format( | ||||
|             self._in_features, self._out_features, self._bias | ||||
|         ) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         super_weight_name = ".".join(prefix + ["_super_weight"]) | ||||
|         super_weight = container.query(super_weight_name) | ||||
|         super_bias_name = ".".join(prefix + ["_super_bias"]) | ||||
|         if container.has(super_bias_name): | ||||
|             super_bias = container.query(super_bias_name) | ||||
|         else: | ||||
|             super_bias = None | ||||
|         return F.linear(input, super_weight, super_bias) | ||||
|  | ||||
|  | ||||
| class SuperMLPv1(SuperModule): | ||||
|     """An MLP layer: FC -> Activation -> Drop -> FC -> Drop.""" | ||||
|  | ||||
|     def __init__( | ||||
|         self, | ||||
|         in_features: IntSpaceType, | ||||
|         hidden_features: IntSpaceType, | ||||
|         out_features: IntSpaceType, | ||||
|         act_layer: Callable[[], nn.Module] = nn.GELU, | ||||
|         drop: Optional[float] = None, | ||||
|     ): | ||||
|         super(SuperMLPv1, self).__init__() | ||||
|         self._in_features = in_features | ||||
|         self._hidden_features = hidden_features | ||||
|         self._out_features = out_features | ||||
|         self._drop_rate = drop | ||||
|         self.fc1 = SuperLinear(in_features, hidden_features) | ||||
|         self.act = act_layer() | ||||
|         self.fc2 = SuperLinear(hidden_features, out_features) | ||||
|         self.drop = nn.Dropout(drop or 0.0) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         space_fc1 = self.fc1.abstract_search_space | ||||
|         space_fc2 = self.fc2.abstract_search_space | ||||
|         if not spaces.is_determined(space_fc1): | ||||
|             root_node.append("fc1", space_fc1) | ||||
|         if not spaces.is_determined(space_fc2): | ||||
|             root_node.append("fc2", space_fc2) | ||||
|         return root_node | ||||
|  | ||||
|     def apply_candidate(self, abstract_child: spaces.VirtualNode): | ||||
|         super(SuperMLPv1, self).apply_candidate(abstract_child) | ||||
|         if "fc1" in abstract_child: | ||||
|             self.fc1.apply_candidate(abstract_child["fc1"]) | ||||
|         if "fc2" in abstract_child: | ||||
|             self.fc2.apply_candidate(abstract_child["fc2"]) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         x = self.fc1(input) | ||||
|         x = self.act(x) | ||||
|         x = self.drop(x) | ||||
|         x = self.fc2(x) | ||||
|         x = self.drop(x) | ||||
|         return x | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "in_features={:}, hidden_features={:}, out_features={:}, drop={:}, fc1 -> act -> drop -> fc2 -> drop,".format( | ||||
|             self._in_features, | ||||
|             self._hidden_features, | ||||
|             self._out_features, | ||||
|             self._drop_rate, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class SuperMLPv2(SuperModule): | ||||
|     """An MLP layer: FC -> Activation -> Drop -> FC -> Drop.""" | ||||
|  | ||||
|     def __init__( | ||||
|         self, | ||||
|         in_features: IntSpaceType, | ||||
|         hidden_multiplier: IntSpaceType, | ||||
|         out_features: IntSpaceType, | ||||
|         act_layer: Callable[[], nn.Module] = nn.GELU, | ||||
|         drop: Optional[float] = None, | ||||
|     ): | ||||
|         super(SuperMLPv2, self).__init__() | ||||
|         self._in_features = in_features | ||||
|         self._hidden_multiplier = hidden_multiplier | ||||
|         self._out_features = out_features | ||||
|         self._drop_rate = drop | ||||
|  | ||||
|         self._create_linear( | ||||
|             "fc1", self.in_features, int(self.in_features * self.hidden_multiplier) | ||||
|         ) | ||||
|         self._create_linear( | ||||
|             "fc2", int(self.in_features * self.hidden_multiplier), self.out_features | ||||
|         ) | ||||
|         self.act = act_layer() | ||||
|         self.drop = nn.Dropout(drop or 0.0) | ||||
|         self.reset_parameters() | ||||
|  | ||||
|     @property | ||||
|     def in_features(self): | ||||
|         return spaces.get_max(self._in_features) | ||||
|  | ||||
|     @property | ||||
|     def hidden_multiplier(self): | ||||
|         return spaces.get_max(self._hidden_multiplier) | ||||
|  | ||||
|     @property | ||||
|     def out_features(self): | ||||
|         return spaces.get_max(self._out_features) | ||||
|  | ||||
|     def _create_linear(self, name, inC, outC): | ||||
|         self.register_parameter( | ||||
|             "{:}_super_weight".format(name), torch.nn.Parameter(torch.Tensor(outC, inC)) | ||||
|         ) | ||||
|         self.register_parameter( | ||||
|             "{:}_super_bias".format(name), torch.nn.Parameter(torch.Tensor(outC)) | ||||
|         ) | ||||
|  | ||||
|     def reset_parameters(self) -> None: | ||||
|         nn.init.kaiming_uniform_(self.fc1_super_weight, a=math.sqrt(5)) | ||||
|         nn.init.kaiming_uniform_(self.fc2_super_weight, a=math.sqrt(5)) | ||||
|         fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.fc1_super_weight) | ||||
|         bound = 1 / math.sqrt(fan_in) | ||||
|         nn.init.uniform_(self.fc1_super_bias, -bound, bound) | ||||
|         fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.fc2_super_weight) | ||||
|         bound = 1 / math.sqrt(fan_in) | ||||
|         nn.init.uniform_(self.fc2_super_bias, -bound, bound) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         if not spaces.is_determined(self._in_features): | ||||
|             root_node.append( | ||||
|                 "_in_features", self._in_features.abstract(reuse_last=True) | ||||
|             ) | ||||
|         if not spaces.is_determined(self._hidden_multiplier): | ||||
|             root_node.append( | ||||
|                 "_hidden_multiplier", self._hidden_multiplier.abstract(reuse_last=True) | ||||
|             ) | ||||
|         if not spaces.is_determined(self._out_features): | ||||
|             root_node.append( | ||||
|                 "_out_features", self._out_features.abstract(reuse_last=True) | ||||
|             ) | ||||
|         return root_node | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         # check inputs -> | ||||
|         if not spaces.is_determined(self._in_features): | ||||
|             expected_input_dim = self.abstract_child["_in_features"].value | ||||
|         else: | ||||
|             expected_input_dim = spaces.get_determined_value(self._in_features) | ||||
|         if input.size(-1) != expected_input_dim: | ||||
|             raise ValueError( | ||||
|                 "Expect the input dim of {:} instead of {:}".format( | ||||
|                     expected_input_dim, input.size(-1) | ||||
|                 ) | ||||
|             ) | ||||
|         # create the weight and bias matrix for fc1 | ||||
|         if not spaces.is_determined(self._hidden_multiplier): | ||||
|             hmul = self.abstract_child["_hidden_multiplier"].value * expected_input_dim | ||||
|         else: | ||||
|             hmul = spaces.get_determined_value(self._hidden_multiplier) | ||||
|         hidden_dim = int(expected_input_dim * hmul) | ||||
|         _fc1_weight = self.fc1_super_weight[:hidden_dim, :expected_input_dim] | ||||
|         _fc1_bias = self.fc1_super_bias[:hidden_dim] | ||||
|         x = F.linear(input, _fc1_weight, _fc1_bias) | ||||
|         x = self.act(x) | ||||
|         x = self.drop(x) | ||||
|         # create the weight and bias matrix for fc2 | ||||
|         if not spaces.is_determined(self._out_features): | ||||
|             out_dim = self.abstract_child["_out_features"].value | ||||
|         else: | ||||
|             out_dim = spaces.get_determined_value(self._out_features) | ||||
|         _fc2_weight = self.fc2_super_weight[:out_dim, :hidden_dim] | ||||
|         _fc2_bias = self.fc2_super_bias[:out_dim] | ||||
|         x = F.linear(x, _fc2_weight, _fc2_bias) | ||||
|         x = self.drop(x) | ||||
|         return x | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         x = F.linear(input, self.fc1_super_weight, self.fc1_super_bias) | ||||
|         x = self.act(x) | ||||
|         x = self.drop(x) | ||||
|         x = F.linear(x, self.fc2_super_weight, self.fc2_super_bias) | ||||
|         x = self.drop(x) | ||||
|         return x | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "in_features={:}, hidden_multiplier={:}, out_features={:}, drop={:}, fc1 -> act -> drop -> fc2 -> drop,".format( | ||||
|             self._in_features, | ||||
|             self._hidden_multiplier, | ||||
|             self._out_features, | ||||
|             self._drop_rate, | ||||
|         ) | ||||
							
								
								
									
										227
									
								
								AutoDL-Projects/xautodl/xlayers/super_module.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										227
									
								
								AutoDL-Projects/xautodl/xlayers/super_module.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,227 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
|  | ||||
| import os | ||||
| from pathlib import Path | ||||
| import abc | ||||
| import tempfile | ||||
| import warnings | ||||
| from typing import Optional, Union, Callable | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| from enum import Enum | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_utils import IntSpaceType, BoolSpaceType | ||||
| from .super_utils import LayerOrder, SuperRunMode | ||||
| from .super_utils import TensorContainer | ||||
| from .super_utils import ShapeContainer | ||||
|  | ||||
| BEST_DIR_KEY = "best_model_dir" | ||||
| BEST_NAME_KEY = "best_model_name" | ||||
| BEST_SCORE_KEY = "best_model_score" | ||||
| ENABLE_CANDIDATE = 0 | ||||
| DISABLE_CANDIDATE = 1 | ||||
|  | ||||
|  | ||||
| class SuperModule(abc.ABC, nn.Module): | ||||
|     """This class equips the nn.Module class with the ability to apply AutoDL.""" | ||||
|  | ||||
|     def __init__(self): | ||||
|         super(SuperModule, self).__init__() | ||||
|         self._super_run_type = SuperRunMode.Default | ||||
|         self._abstract_child = None | ||||
|         self._verbose = False | ||||
|         self._meta_info = {} | ||||
|         self._candidate_mode = DISABLE_CANDIDATE | ||||
|  | ||||
|     def set_super_run_type(self, super_run_type): | ||||
|         def _reset_super_run(m): | ||||
|             if isinstance(m, SuperModule): | ||||
|                 m._super_run_type = super_run_type | ||||
|  | ||||
|         self.apply(_reset_super_run) | ||||
|  | ||||
|     def add_module(self, name: str, module: Optional[torch.nn.Module]) -> None: | ||||
|         if not isinstance(module, SuperModule): | ||||
|             warnings.warn( | ||||
|                 "Add {:}:{:} module, which is not SuperModule, into {:}".format( | ||||
|                     name, module.__class__.__name__, self.__class__.__name__ | ||||
|                 ) | ||||
|                 + "\n" | ||||
|                 + "It may cause some functions invalid." | ||||
|             ) | ||||
|         super(SuperModule, self).add_module(name, module) | ||||
|  | ||||
|     def apply_verbose(self, verbose): | ||||
|         def _reset_verbose(m): | ||||
|             if isinstance(m, SuperModule): | ||||
|                 m._verbose = verbose | ||||
|  | ||||
|         self.apply(_reset_verbose) | ||||
|  | ||||
|     def apply_candidate(self, abstract_child): | ||||
|         if not isinstance(abstract_child, spaces.VirtualNode): | ||||
|             raise ValueError( | ||||
|                 "Invalid abstract child program: {:}".format(abstract_child) | ||||
|             ) | ||||
|         self._abstract_child = abstract_child | ||||
|  | ||||
|     def enable_candidate(self): | ||||
|         def _enable_candidate(m): | ||||
|             if isinstance(m, SuperModule): | ||||
|                 m._candidate_mode = ENABLE_CANDIDATE | ||||
|  | ||||
|         self.apply(_enable_candidate) | ||||
|  | ||||
|     def disable_candidate(self): | ||||
|         def _disable_candidate(m): | ||||
|             if isinstance(m, SuperModule): | ||||
|                 m._candidate_mode = DISABLE_CANDIDATE | ||||
|  | ||||
|         self.apply(_disable_candidate) | ||||
|  | ||||
|     def get_w_container(self): | ||||
|         container = TensorContainer() | ||||
|         for name, param in self.named_parameters(): | ||||
|             container.append(name, param, True) | ||||
|         for name, buf in self.named_buffers(): | ||||
|             container.append(name, buf, False) | ||||
|         return container | ||||
|  | ||||
|     def analyze_weights(self): | ||||
|         with torch.no_grad(): | ||||
|             for name, param in self.named_parameters(): | ||||
|                 shapestr = "[{:10s}] shape={:}".format(name, list(param.shape)) | ||||
|                 finalstr = shapestr + "{:.2f} +- {:.2f}".format( | ||||
|                     param.mean(), param.std() | ||||
|                 ) | ||||
|                 print(finalstr) | ||||
|  | ||||
|     def numel(self, buffer=True): | ||||
|         total = 0 | ||||
|         for name, param in self.named_parameters(): | ||||
|             total += param.numel() | ||||
|         if buffer: | ||||
|             for name, buf in self.named_buffers(): | ||||
|                 total += buf.numel() | ||||
|         return total | ||||
|  | ||||
|     def set_best_dir(self, xdir): | ||||
|         self._meta_info[BEST_DIR_KEY] = str(xdir) | ||||
|         Path(xdir).mkdir(parents=True, exist_ok=True) | ||||
|  | ||||
|     def set_best_name(self, xname): | ||||
|         self._meta_info[BEST_NAME_KEY] = str(xname) | ||||
|  | ||||
|     def save_best(self, score): | ||||
|         if BEST_DIR_KEY not in self._meta_info: | ||||
|             tempdir = tempfile.mkdtemp("-xlayers") | ||||
|             self._meta_info[BEST_DIR_KEY] = tempdir | ||||
|         if BEST_SCORE_KEY not in self._meta_info: | ||||
|             self._meta_info[BEST_SCORE_KEY] = None | ||||
|         best_score = self._meta_info[BEST_SCORE_KEY] | ||||
|         if best_score is None or best_score <= score: | ||||
|             best_save_name = self._meta_info.get( | ||||
|                 BEST_NAME_KEY, "best-{:}.pth".format(self.__class__.__name__) | ||||
|             ) | ||||
|  | ||||
|             best_save_path = os.path.join(self._meta_info[BEST_DIR_KEY], best_save_name) | ||||
|             self._meta_info[BEST_SCORE_KEY] = score | ||||
|             torch.save(self.state_dict(), best_save_path) | ||||
|             return True, self._meta_info[BEST_SCORE_KEY] | ||||
|         else: | ||||
|             return False, self._meta_info[BEST_SCORE_KEY] | ||||
|  | ||||
|     def load_best(self, best_save_name=None): | ||||
|         if BEST_DIR_KEY not in self._meta_info: | ||||
|             raise ValueError("Please set BEST_DIR_KEY at first") | ||||
|         if best_save_name is None: | ||||
|             best_save_name = self._meta_info.get( | ||||
|                 BEST_NAME_KEY, "best-{:}.pth".format(self.__class__.__name__) | ||||
|             ) | ||||
|         best_save_path = os.path.join(self._meta_info[BEST_DIR_KEY], best_save_name) | ||||
|         state_dict = torch.load(best_save_path) | ||||
|         self.load_state_dict(state_dict) | ||||
|  | ||||
|     def has_best(self, best_name=None): | ||||
|         if BEST_DIR_KEY not in self._meta_info: | ||||
|             raise ValueError("Please set BEST_DIR_KEY at first") | ||||
|         if best_name is None: | ||||
|             best_save_name = self._meta_info.get( | ||||
|                 BEST_NAME_KEY, "best-{:}.pth".format(self.__class__.__name__) | ||||
|             ) | ||||
|         else: | ||||
|             best_save_name = best_name | ||||
|         best_save_path = os.path.join(self._meta_info[BEST_DIR_KEY], best_save_name) | ||||
|         return os.path.exists(best_save_path) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @property | ||||
|     def super_run_type(self): | ||||
|         return self._super_run_type | ||||
|  | ||||
|     @property | ||||
|     def abstract_child(self): | ||||
|         return self._abstract_child | ||||
|  | ||||
|     @property | ||||
|     def verbose(self): | ||||
|         return self._verbose | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def forward_raw(self, *inputs): | ||||
|         """Use the largest candidate for forward. Similar to the original PyTorch model.""" | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def forward_candidate(self, *inputs): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @property | ||||
|     def name_with_id(self): | ||||
|         return "name={:}, id={:}".format(self.__class__.__name__, id(self)) | ||||
|  | ||||
|     def get_shape_str(self, tensors): | ||||
|         if isinstance(tensors, (list, tuple)): | ||||
|             shapes = [self.get_shape_str(tensor) for tensor in tensors] | ||||
|             if len(shapes) == 1: | ||||
|                 return shapes[0] | ||||
|             else: | ||||
|                 return ", ".join(shapes) | ||||
|         elif isinstance(tensors, (torch.Tensor, nn.Parameter)): | ||||
|             return str(tuple(tensors.shape)) | ||||
|         else: | ||||
|             raise TypeError("Invalid input type: {:}.".format(type(tensors))) | ||||
|  | ||||
|     def forward(self, *inputs): | ||||
|         if self.verbose: | ||||
|             print( | ||||
|                 "[{:}] inputs shape: {:}".format( | ||||
|                     self.name_with_id, self.get_shape_str(inputs) | ||||
|                 ) | ||||
|             ) | ||||
|         if self.super_run_type == SuperRunMode.FullModel: | ||||
|             outputs = self.forward_raw(*inputs) | ||||
|         elif self.super_run_type == SuperRunMode.Candidate: | ||||
|             if self._candidate_mode == DISABLE_CANDIDATE: | ||||
|                 raise ValueError("candidate mode is disabled") | ||||
|             outputs = self.forward_candidate(*inputs) | ||||
|         else: | ||||
|             raise ValueError( | ||||
|                 "Unknown Super Model Run Mode: {:}".format(self.super_run_type) | ||||
|             ) | ||||
|         if self.verbose: | ||||
|             print( | ||||
|                 "[{:}] outputs shape: {:}".format( | ||||
|                     self.name_with_id, self.get_shape_str(outputs) | ||||
|                 ) | ||||
|             ) | ||||
|         return outputs | ||||
|  | ||||
|     def forward_with_container(self, inputs, container, prefix=[]): | ||||
|         raise NotImplementedError | ||||
							
								
								
									
										224
									
								
								AutoDL-Projects/xautodl/xlayers/super_norm.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										224
									
								
								AutoDL-Projects/xautodl/xlayers/super_norm.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,224 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
| import math | ||||
| from typing import Optional, Callable | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_module import SuperModule | ||||
| from .super_module import IntSpaceType | ||||
| from .super_module import BoolSpaceType | ||||
|  | ||||
|  | ||||
| class SuperLayerNorm1D(SuperModule): | ||||
|     """Super Layer Norm.""" | ||||
|  | ||||
|     def __init__( | ||||
|         self, dim: IntSpaceType, eps: float = 1e-6, elementwise_affine: bool = True | ||||
|     ) -> None: | ||||
|         super(SuperLayerNorm1D, self).__init__() | ||||
|         self._in_dim = dim | ||||
|         self._eps = eps | ||||
|         self._elementwise_affine = elementwise_affine | ||||
|         if self._elementwise_affine: | ||||
|             self.register_parameter("weight", nn.Parameter(torch.Tensor(self.in_dim))) | ||||
|             self.register_parameter("bias", nn.Parameter(torch.Tensor(self.in_dim))) | ||||
|         else: | ||||
|             self.register_parameter("weight", None) | ||||
|             self.register_parameter("bias", None) | ||||
|         self.reset_parameters() | ||||
|  | ||||
|     @property | ||||
|     def in_dim(self): | ||||
|         return spaces.get_max(self._in_dim) | ||||
|  | ||||
|     @property | ||||
|     def eps(self): | ||||
|         return self._eps | ||||
|  | ||||
|     def reset_parameters(self) -> None: | ||||
|         if self._elementwise_affine: | ||||
|             nn.init.ones_(self.weight) | ||||
|             nn.init.zeros_(self.bias) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         if not spaces.is_determined(self._in_dim): | ||||
|             root_node.append("_in_dim", self._in_dim.abstract(reuse_last=True)) | ||||
|         return root_node | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         # check inputs -> | ||||
|         if not spaces.is_determined(self._in_dim): | ||||
|             expected_input_dim = self.abstract_child["_in_dim"].value | ||||
|         else: | ||||
|             expected_input_dim = spaces.get_determined_value(self._in_dim) | ||||
|         if input.size(-1) != expected_input_dim: | ||||
|             raise ValueError( | ||||
|                 "Expect the input dim of {:} instead of {:}".format( | ||||
|                     expected_input_dim, input.size(-1) | ||||
|                 ) | ||||
|             ) | ||||
|         if self._elementwise_affine: | ||||
|             weight = self.weight[:expected_input_dim] | ||||
|             bias = self.bias[:expected_input_dim] | ||||
|         else: | ||||
|             weight, bias = None, None | ||||
|         return F.layer_norm(input, (expected_input_dim,), weight, bias, self.eps) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return F.layer_norm(input, (self.in_dim,), self.weight, self.bias, self.eps) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         super_weight_name = ".".join(prefix + ["weight"]) | ||||
|         if container.has(super_weight_name): | ||||
|             weight = container.query(super_weight_name) | ||||
|         else: | ||||
|             weight = None | ||||
|         super_bias_name = ".".join(prefix + ["bias"]) | ||||
|         if container.has(super_bias_name): | ||||
|             bias = container.query(super_bias_name) | ||||
|         else: | ||||
|             bias = None | ||||
|         return F.layer_norm(input, (self.in_dim,), weight, bias, self.eps) | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return ( | ||||
|             "shape={in_dim}, eps={eps}, elementwise_affine={elementwise_affine}".format( | ||||
|                 in_dim=self._in_dim, | ||||
|                 eps=self._eps, | ||||
|                 elementwise_affine=self._elementwise_affine, | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class SuperSimpleNorm(SuperModule): | ||||
|     """Super simple normalization.""" | ||||
|  | ||||
|     def __init__(self, mean, std, inplace=False) -> None: | ||||
|         super(SuperSimpleNorm, self).__init__() | ||||
|         self.register_buffer("_mean", torch.tensor(mean, dtype=torch.float)) | ||||
|         self.register_buffer("_std", torch.tensor(std, dtype=torch.float)) | ||||
|         self._inplace = inplace | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         # check inputs -> | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         if not self._inplace: | ||||
|             tensor = input.clone() | ||||
|         else: | ||||
|             tensor = input | ||||
|         mean = torch.as_tensor(self._mean, dtype=tensor.dtype, device=tensor.device) | ||||
|         std = torch.as_tensor(self._std, dtype=tensor.dtype, device=tensor.device) | ||||
|         if (std == 0).any(): | ||||
|             raise ValueError( | ||||
|                 "std evaluated to zero after conversion to {}, leading to division by zero.".format( | ||||
|                     tensor.dtype | ||||
|                 ) | ||||
|             ) | ||||
|         while mean.ndim < tensor.ndim: | ||||
|             mean, std = torch.unsqueeze(mean, dim=0), torch.unsqueeze(std, dim=0) | ||||
|         return tensor.sub_(mean).div_(std) | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "mean={mean}, std={std}, inplace={inplace}".format( | ||||
|             mean=self._mean.item(), std=self._std.item(), inplace=self._inplace | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class SuperSimpleLearnableNorm(SuperModule): | ||||
|     """Super simple normalization.""" | ||||
|  | ||||
|     def __init__(self, mean=0, std=1, eps=1e-6, inplace=False) -> None: | ||||
|         super(SuperSimpleLearnableNorm, self).__init__() | ||||
|         self.register_parameter( | ||||
|             "_mean", nn.Parameter(torch.tensor(mean, dtype=torch.float)) | ||||
|         ) | ||||
|         self.register_parameter( | ||||
|             "_std", nn.Parameter(torch.tensor(std, dtype=torch.float)) | ||||
|         ) | ||||
|         self._eps = eps | ||||
|         self._inplace = inplace | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         # check inputs -> | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         if not self._inplace: | ||||
|             tensor = input.clone() | ||||
|         else: | ||||
|             tensor = input | ||||
|         mean, std = ( | ||||
|             self._mean.to(tensor.device), | ||||
|             torch.abs(self._std.to(tensor.device)) + self._eps, | ||||
|         ) | ||||
|         if (std == 0).any(): | ||||
|             raise ValueError("std leads to division by zero.") | ||||
|         while mean.ndim < tensor.ndim: | ||||
|             mean, std = torch.unsqueeze(mean, dim=0), torch.unsqueeze(std, dim=0) | ||||
|         return tensor.sub_(mean).div_(std) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         if not self._inplace: | ||||
|             tensor = input.clone() | ||||
|         else: | ||||
|             tensor = input | ||||
|         mean_name = ".".join(prefix + ["_mean"]) | ||||
|         std_name = ".".join(prefix + ["_std"]) | ||||
|         mean, std = ( | ||||
|             container.query(mean_name).to(tensor.device), | ||||
|             torch.abs(container.query(std_name).to(tensor.device)) + self._eps, | ||||
|         ) | ||||
|         while mean.ndim < tensor.ndim: | ||||
|             mean, std = torch.unsqueeze(mean, dim=0), torch.unsqueeze(std, dim=0) | ||||
|         return tensor.sub_(mean).div_(std) | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "mean={mean}, std={std}, inplace={inplace}".format( | ||||
|             mean=self._mean.item(), std=self._std.item(), inplace=self._inplace | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class SuperIdentity(SuperModule): | ||||
|     """Super identity mapping layer.""" | ||||
|  | ||||
|     def __init__(self, inplace=False, **kwargs) -> None: | ||||
|         super(SuperIdentity, self).__init__() | ||||
|         self._inplace = inplace | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         return spaces.VirtualNode(id(self)) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         # check inputs -> | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         if not self._inplace: | ||||
|             tensor = input.clone() | ||||
|         else: | ||||
|             tensor = input | ||||
|         return tensor | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "inplace={inplace}".format(inplace=self._inplace) | ||||
|  | ||||
|     def forward_with_container(self, input, container, prefix=[]): | ||||
|         return self.forward_raw(input) | ||||
							
								
								
									
										105
									
								
								AutoDL-Projects/xautodl/xlayers/super_positional_embedding.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								AutoDL-Projects/xautodl/xlayers/super_positional_embedding.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,105 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.02 # | ||||
| ##################################################### | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import math | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_module import SuperModule | ||||
| from .super_module import IntSpaceType | ||||
|  | ||||
|  | ||||
| class SuperDynamicPositionE(SuperModule): | ||||
|     """Applies a positional encoding to the input positions.""" | ||||
|  | ||||
|     def __init__(self, dimension: int, scale: float = 1.0) -> None: | ||||
|         super(SuperDynamicPositionE, self).__init__() | ||||
|  | ||||
|         self._scale = scale | ||||
|         self._dimension = dimension | ||||
|         # weights to be optimized | ||||
|         self.register_buffer( | ||||
|             "_div_term", | ||||
|             torch.exp( | ||||
|                 torch.arange(0, dimension, 2).float() * (-math.log(10000.0) / dimension) | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         return root_node | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         positions = torch.unsqueeze(input * self._scale, dim=-1) | ||||
|         divisions = torch.reshape( | ||||
|             self._div_term, [1] * input.ndim + [self._div_term.numel()] | ||||
|         ) | ||||
|         values = positions / divisions | ||||
|         embeds = torch.cat((torch.sin(values), torch.cos(values)), dim=-1) | ||||
|         return embeds | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         return "scale={:}, dim={:}".format(self._scale, self._dimension) | ||||
|  | ||||
|  | ||||
| class SuperPositionalEncoder(SuperModule): | ||||
|     """Attention Is All You Need: https://arxiv.org/pdf/1706.03762.pdf | ||||
|     https://github.com/pytorch/examples/blob/master/word_language_model/model.py#L65 | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, d_model: IntSpaceType, max_seq_len: int, dropout: float = 0.1): | ||||
|         super(SuperPositionalEncoder, self).__init__() | ||||
|         self._d_model = d_model | ||||
|         # create constant 'pe' matrix with values dependant on | ||||
|         # pos and i | ||||
|         self.dropout = nn.Dropout(p=dropout) | ||||
|         self.register_buffer("pe", self.create_pos_embed(max_seq_len, self.d_model)) | ||||
|  | ||||
|     @property | ||||
|     def d_model(self): | ||||
|         return spaces.get_max(self._d_model) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         if not spaces.is_determined(self._d_model): | ||||
|             root_node.append("_d_model", self._d_model.abstract(reuse_last=True)) | ||||
|         return root_node | ||||
|  | ||||
|     def create_pos_embed(self, max_seq_len, d_model): | ||||
|         pe = torch.zeros(max_seq_len, d_model) | ||||
|         for pos in range(max_seq_len): | ||||
|             for i in range(0, d_model): | ||||
|                 div = 10000 ** ((i // 2) * 2 / d_model) | ||||
|                 value = pos / div | ||||
|                 if i % 2 == 0: | ||||
|                     pe[pos, i] = math.sin(value) | ||||
|                 else: | ||||
|                     pe[pos, i] = math.cos(value) | ||||
|         return pe.unsqueeze(0) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         batch, seq, fdim = input.shape[:3] | ||||
|         embeddings = self.pe[:, :seq] | ||||
|         if not spaces.is_determined(self._d_model): | ||||
|             expected_d_model = self.abstract_child["_d_model"].value | ||||
|         else: | ||||
|             expected_d_model = spaces.get_determined_value(self._d_model) | ||||
|         assert fdim == expected_d_model, "{:} vs {:}".format(fdim, expected_d_model) | ||||
|  | ||||
|         embeddings = torch.nn.functional.interpolate( | ||||
|             embeddings, size=(expected_d_model), mode="linear", align_corners=True | ||||
|         ) | ||||
|         outs = self.dropout(input + embeddings) | ||||
|         return outs | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         batch, seq, fdim = input.shape[:3] | ||||
|         embeddings = self.pe[:, :seq] | ||||
|         outs = self.dropout(input + embeddings) | ||||
|         return outs | ||||
							
								
								
									
										187
									
								
								AutoDL-Projects/xautodl/xlayers/super_rearrange.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										187
									
								
								AutoDL-Projects/xautodl/xlayers/super_rearrange.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,187 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ############################################################# | ||||
| # Borrow the idea of https://github.com/arogozhnikov/einops # | ||||
| ############################################################# | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
| import math | ||||
| import numpy as np | ||||
| import itertools | ||||
| import functools | ||||
| from collections import OrderedDict | ||||
| from typing import Optional, Callable | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .misc_utils import ParsedExpression, AnonymousAxis | ||||
| from .super_module import SuperModule | ||||
| from .super_module import IntSpaceType | ||||
| from .super_module import BoolSpaceType | ||||
|  | ||||
|  | ||||
| class SuperReArrange(SuperModule): | ||||
|     """Applies the rearrange operation.""" | ||||
|  | ||||
|     def __init__(self, pattern, **axes_lengths): | ||||
|         super(SuperReArrange, self).__init__() | ||||
|  | ||||
|         self._pattern = pattern | ||||
|         self._axes_lengths = axes_lengths | ||||
|         axes_lengths = tuple(sorted(self._axes_lengths.items())) | ||||
|         # Perform initial parsing of pattern and provided supplementary info | ||||
|         # axes_lengths is a tuple of tuples (axis_name, axis_length) | ||||
|         left, right = pattern.split("->") | ||||
|         left = ParsedExpression(left) | ||||
|         right = ParsedExpression(right) | ||||
|         difference = set.symmetric_difference(left.identifiers, right.identifiers) | ||||
|         if difference: | ||||
|             raise ValueError( | ||||
|                 "Identifiers only on one side of expression (should be on both): {}".format( | ||||
|                     difference | ||||
|                 ) | ||||
|             ) | ||||
|  | ||||
|         # parsing all dimensions to find out lengths | ||||
|         axis_name2known_length = OrderedDict() | ||||
|         for composite_axis in left.composition: | ||||
|             for axis_name in composite_axis: | ||||
|                 if isinstance(axis_name, AnonymousAxis): | ||||
|                     axis_name2known_length[axis_name] = axis_name.value | ||||
|                 else: | ||||
|                     axis_name2known_length[axis_name] = None | ||||
|         for axis_name in right.identifiers: | ||||
|             if axis_name not in axis_name2known_length: | ||||
|                 if isinstance(axis_name, AnonymousAxis): | ||||
|                     axis_name2known_length[axis_name] = axis_name.value | ||||
|                 else: | ||||
|                     axis_name2known_length[axis_name] = None | ||||
|  | ||||
|         axis_name2position = { | ||||
|             name: position for position, name in enumerate(axis_name2known_length) | ||||
|         } | ||||
|         for elementary_axis, axis_length in axes_lengths: | ||||
|             if not ParsedExpression.check_axis_name(elementary_axis): | ||||
|                 raise ValueError("Invalid name for an axis", elementary_axis) | ||||
|             if elementary_axis not in axis_name2known_length: | ||||
|                 raise ValueError( | ||||
|                     "Axis {} is not used in transform".format(elementary_axis) | ||||
|                 ) | ||||
|             axis_name2known_length[elementary_axis] = axis_length | ||||
|  | ||||
|         input_composite_axes = [] | ||||
|         # some of shapes will be inferred later - all information is prepared for faster inference | ||||
|         for composite_axis in left.composition: | ||||
|             known = { | ||||
|                 axis | ||||
|                 for axis in composite_axis | ||||
|                 if axis_name2known_length[axis] is not None | ||||
|             } | ||||
|             unknown = { | ||||
|                 axis for axis in composite_axis if axis_name2known_length[axis] is None | ||||
|             } | ||||
|             if len(unknown) > 1: | ||||
|                 raise ValueError("Could not infer sizes for {}".format(unknown)) | ||||
|             assert len(unknown) + len(known) == len(composite_axis) | ||||
|             input_composite_axes.append( | ||||
|                 ( | ||||
|                     [axis_name2position[axis] for axis in known], | ||||
|                     [axis_name2position[axis] for axis in unknown], | ||||
|                 ) | ||||
|             ) | ||||
|  | ||||
|         axis_position_after_reduction = {} | ||||
|         for axis_name in itertools.chain(*left.composition): | ||||
|             if axis_name in right.identifiers: | ||||
|                 axis_position_after_reduction[axis_name] = len( | ||||
|                     axis_position_after_reduction | ||||
|                 ) | ||||
|  | ||||
|         result_axes_grouping = [] | ||||
|         for composite_axis in right.composition: | ||||
|             result_axes_grouping.append( | ||||
|                 [axis_name2position[axis] for axis in composite_axis] | ||||
|             ) | ||||
|  | ||||
|         ordered_axis_right = list(itertools.chain(*right.composition)) | ||||
|         axes_permutation = tuple( | ||||
|             axis_position_after_reduction[axis] | ||||
|             for axis in ordered_axis_right | ||||
|             if axis in left.identifiers | ||||
|         ) | ||||
|         # | ||||
|         self.input_composite_axes = input_composite_axes | ||||
|         self.output_composite_axes = result_axes_grouping | ||||
|         self.elementary_axes_lengths = list(axis_name2known_length.values()) | ||||
|         self.axes_permutation = axes_permutation | ||||
|  | ||||
|     @functools.lru_cache(maxsize=1024) | ||||
|     def reconstruct_from_shape(self, shape): | ||||
|         if len(shape) != len(self.input_composite_axes): | ||||
|             raise ValueError( | ||||
|                 "Expected {} dimensions, got {}".format( | ||||
|                     len(self.input_composite_axes), len(shape) | ||||
|                 ) | ||||
|             ) | ||||
|         axes_lengths = list(self.elementary_axes_lengths) | ||||
|         for input_axis, (known_axes, unknown_axes) in enumerate( | ||||
|             self.input_composite_axes | ||||
|         ): | ||||
|             length = shape[input_axis] | ||||
|             known_product = 1 | ||||
|             for axis in known_axes: | ||||
|                 known_product *= axes_lengths[axis] | ||||
|             if len(unknown_axes) == 0: | ||||
|                 if ( | ||||
|                     isinstance(length, int) | ||||
|                     and isinstance(known_product, int) | ||||
|                     and length != known_product | ||||
|                 ): | ||||
|                     raise ValueError( | ||||
|                         "Shape mismatch, {} != {}".format(length, known_product) | ||||
|                     ) | ||||
|             else: | ||||
|                 if ( | ||||
|                     isinstance(length, int) | ||||
|                     and isinstance(known_product, int) | ||||
|                     and length % known_product != 0 | ||||
|                 ): | ||||
|                     raise ValueError( | ||||
|                         "Shape mismatch, can't divide axis of length {} in chunks of {}".format( | ||||
|                             length, known_product | ||||
|                         ) | ||||
|                     ) | ||||
|  | ||||
|                 (unknown_axis,) = unknown_axes | ||||
|                 axes_lengths[unknown_axis] = length // known_product | ||||
|         # at this point all axes_lengths are computed (either have values or variables, but not Nones) | ||||
|         final_shape = [] | ||||
|         for output_axis, grouping in enumerate(self.output_composite_axes): | ||||
|             lengths = [axes_lengths[elementary_axis] for elementary_axis in grouping] | ||||
|             final_shape.append(int(np.prod(lengths))) | ||||
|         axes_reordering = self.axes_permutation | ||||
|         return axes_lengths, axes_reordering, final_shape | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         return root_node | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         self.forward_raw(input) | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         init_shape, axes_reordering, final_shape = self.reconstruct_from_shape( | ||||
|             tuple(input.shape) | ||||
|         ) | ||||
|         tensor = torch.reshape(input, init_shape) | ||||
|         tensor = tensor.permute(axes_reordering) | ||||
|         tensor = torch.reshape(tensor, final_shape) | ||||
|         return tensor | ||||
|  | ||||
|     def extra_repr(self) -> str: | ||||
|         params = repr(self._pattern) | ||||
|         for axis, length in self._axes_lengths.items(): | ||||
|             params += ", {}={}".format(axis, length) | ||||
|         return "{:}".format(params) | ||||
							
								
								
									
										58
									
								
								AutoDL-Projects/xautodl/xlayers/super_trade_stem.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								AutoDL-Projects/xautodl/xlayers/super_trade_stem.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| import math | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_linear import SuperLinear | ||||
| from .super_module import SuperModule | ||||
| from .super_module import IntSpaceType | ||||
|  | ||||
|  | ||||
| class SuperAlphaEBDv1(SuperModule): | ||||
|     """A simple layer to convert the raw trading data from 1-D to 2-D data and apply an FC layer.""" | ||||
|  | ||||
|     def __init__(self, d_feat: int, embed_dim: IntSpaceType): | ||||
|         super(SuperAlphaEBDv1, self).__init__() | ||||
|         self._d_feat = d_feat | ||||
|         self._embed_dim = embed_dim | ||||
|         self.proj = SuperLinear(d_feat, embed_dim) | ||||
|  | ||||
|     @property | ||||
|     def embed_dim(self): | ||||
|         return spaces.get_max(self._embed_dim) | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         space = self.proj.abstract_search_space | ||||
|         if not spaces.is_determined(space): | ||||
|             root_node.append("proj", space) | ||||
|         if not spaces.is_determined(self._embed_dim): | ||||
|             root_node.append("_embed_dim", self._embed_dim.abstract(reuse_last=True)) | ||||
|         return root_node | ||||
|  | ||||
|     def apply_candidate(self, abstract_child: spaces.VirtualNode): | ||||
|         super(SuperAlphaEBDv1, self).apply_candidate(abstract_child) | ||||
|         if "proj" in abstract_child: | ||||
|             self.proj.apply_candidate(abstract_child["proj"]) | ||||
|  | ||||
|     def forward_candidate(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         x = input.reshape(len(input), self._d_feat, -1)  # [N, F*T] -> [N, F, T] | ||||
|         x = x.permute(0, 2, 1)  # [N, F, T] -> [N, T, F] | ||||
|         if not spaces.is_determined(self._embed_dim): | ||||
|             embed_dim = self.abstract_child["_embed_dim"].value | ||||
|         else: | ||||
|             embed_dim = spaces.get_determined_value(self._embed_dim) | ||||
|         out = self.proj(x) * math.sqrt(embed_dim) | ||||
|         return out | ||||
|  | ||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||
|         x = input.reshape(len(input), self._d_feat, -1)  # [N, F*T] -> [N, F, T] | ||||
|         x = x.permute(0, 2, 1)  # [N, F, T] -> [N, T, F] | ||||
|         out = self.proj(x) * math.sqrt(self.embed_dim) | ||||
|         return out | ||||
							
								
								
									
										127
									
								
								AutoDL-Projects/xautodl/xlayers/super_transformer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								AutoDL-Projects/xautodl/xlayers/super_transformer.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,127 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
| import math | ||||
| from typing import Optional, Callable | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
| from xautodl import spaces | ||||
| from .super_module import IntSpaceType | ||||
| from .super_module import BoolSpaceType | ||||
| from .super_module import LayerOrder | ||||
| from .super_module import SuperModule | ||||
| from .super_linear import SuperMLPv2 | ||||
| from .super_norm import SuperLayerNorm1D | ||||
| from .super_attention import SuperSelfAttention | ||||
|  | ||||
|  | ||||
| class SuperTransformerEncoderLayer(SuperModule): | ||||
|     """TransformerEncoderLayer is made up of self-attn and feedforward network. | ||||
|     This is a super model for TransformerEncoderLayer that can support search for the transformer encoder layer. | ||||
|  | ||||
|     Reference: | ||||
|       - Paper: Attention Is All You Need, NeurIPS 2017 | ||||
|       - PyTorch Implementation: https://pytorch.org/docs/stable/_modules/torch/nn/modules/transformer.html#TransformerEncoderLayer | ||||
|  | ||||
|     Details: | ||||
|       the original post-norm version: MHA -> residual -> norm -> MLP -> residual -> norm | ||||
|       the pre-norm version: norm -> MHA -> residual -> norm -> MLP -> residual | ||||
|     """ | ||||
|  | ||||
|     def __init__( | ||||
|         self, | ||||
|         d_model: IntSpaceType, | ||||
|         num_heads: IntSpaceType, | ||||
|         qkv_bias: BoolSpaceType = False, | ||||
|         mlp_hidden_multiplier: IntSpaceType = 4, | ||||
|         dropout: Optional[float] = None, | ||||
|         att_dropout: Optional[float] = None, | ||||
|         norm_affine: bool = True, | ||||
|         act_layer: Callable[[], nn.Module] = nn.GELU, | ||||
|         order: LayerOrder = LayerOrder.PreNorm, | ||||
|         use_mask: bool = False, | ||||
|     ): | ||||
|         super(SuperTransformerEncoderLayer, self).__init__() | ||||
|         mha = SuperSelfAttention( | ||||
|             d_model, | ||||
|             d_model, | ||||
|             num_heads=num_heads, | ||||
|             qkv_bias=qkv_bias, | ||||
|             attn_drop=att_dropout, | ||||
|             proj_drop=None, | ||||
|             use_mask=use_mask, | ||||
|         ) | ||||
|         mlp = SuperMLPv2( | ||||
|             d_model, | ||||
|             hidden_multiplier=mlp_hidden_multiplier, | ||||
|             out_features=d_model, | ||||
|             act_layer=act_layer, | ||||
|             drop=dropout, | ||||
|         ) | ||||
|         if order is LayerOrder.PreNorm: | ||||
|             self.norm1 = SuperLayerNorm1D(d_model, elementwise_affine=norm_affine) | ||||
|             self.mha = mha | ||||
|             self.drop = nn.Dropout(dropout or 0.0) | ||||
|             self.norm2 = SuperLayerNorm1D(d_model, elementwise_affine=norm_affine) | ||||
|             self.mlp = mlp | ||||
|         elif order is LayerOrder.PostNorm: | ||||
|             self.mha = mha | ||||
|             self.drop1 = nn.Dropout(dropout or 0.0) | ||||
|             self.norm1 = SuperLayerNorm1D(d_model, elementwise_affine=norm_affine) | ||||
|             self.mlp = mlp | ||||
|             self.drop2 = nn.Dropout(dropout or 0.0) | ||||
|             self.norm2 = SuperLayerNorm1D(d_model, elementwise_affine=norm_affine) | ||||
|         else: | ||||
|             raise ValueError("Unknown order: {:}".format(order)) | ||||
|         self._order = order | ||||
|  | ||||
|     @property | ||||
|     def abstract_search_space(self): | ||||
|         root_node = spaces.VirtualNode(id(self)) | ||||
|         xdict = dict( | ||||
|             mha=self.mha.abstract_search_space, | ||||
|             norm1=self.norm1.abstract_search_space, | ||||
|             mlp=self.mlp.abstract_search_space, | ||||
|             norm2=self.norm2.abstract_search_space, | ||||
|         ) | ||||
|         for key, space in xdict.items(): | ||||
|             if not spaces.is_determined(space): | ||||
|                 root_node.append(key, space) | ||||
|         return root_node | ||||
|  | ||||
|     def apply_candidate(self, abstract_child: spaces.VirtualNode): | ||||
|         super(SuperTransformerEncoderLayer, self).apply_candidate(abstract_child) | ||||
|         valid_keys = ["mha", "norm1", "mlp", "norm2"] | ||||
|         for key in valid_keys: | ||||
|             if key in abstract_child: | ||||
|                 getattr(self, key).apply_candidate(abstract_child[key]) | ||||
|  | ||||
|     def forward_candidate(self, inputs: torch.Tensor) -> torch.Tensor: | ||||
|         return self.forward_raw(inputs) | ||||
|  | ||||
|     def forward_raw(self, inputs: torch.Tensor) -> torch.Tensor: | ||||
|         if self._order is LayerOrder.PreNorm: | ||||
|             # https://github.com/google-research/vision_transformer/blob/master/vit_jax/models.py#L135 | ||||
|             x = self.norm1(inputs) | ||||
|             x = self.mha(x) | ||||
|             x = self.drop(x) | ||||
|             x = x + inputs | ||||
|             # feed-forward layer -- MLP | ||||
|             y = self.norm2(x) | ||||
|             outs = x + self.mlp(y) | ||||
|         elif self._order is LayerOrder.PostNorm: | ||||
|             # https://pytorch.org/docs/stable/_modules/torch/nn/modules/transformer.html#TransformerEncoder | ||||
|             # multi-head attention | ||||
|             x = self.mha(inputs) | ||||
|             x = inputs + self.drop1(x) | ||||
|             x = self.norm1(x) | ||||
|             # feed-forward layer -- MLP | ||||
|             y = self.mlp(x) | ||||
|             y = x + self.drop2(y) | ||||
|             outs = self.norm2(y) | ||||
|         else: | ||||
|             raise ValueError("Unknown order: {:}".format(self._order)) | ||||
|         return outs | ||||
							
								
								
									
										222
									
								
								AutoDL-Projects/xautodl/xlayers/super_utils.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										222
									
								
								AutoDL-Projects/xautodl/xlayers/super_utils.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,222 @@ | ||||
| ##################################################### | ||||
| # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.03 # | ||||
| ##################################################### | ||||
|  | ||||
| import abc | ||||
| import warnings | ||||
| from typing import Optional, Union, Callable | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| from enum import Enum | ||||
|  | ||||
| from xautodl import spaces | ||||
|  | ||||
| IntSpaceType = Union[int, spaces.Integer, spaces.Categorical] | ||||
| BoolSpaceType = Union[bool, spaces.Categorical] | ||||
|  | ||||
|  | ||||
| class LayerOrder(Enum): | ||||
|     """This class defines the enumerations for order of operation in a residual or normalization-based layer.""" | ||||
|  | ||||
|     PreNorm = "pre-norm" | ||||
|     PostNorm = "post-norm" | ||||
|  | ||||
|  | ||||
| class SuperRunMode(Enum): | ||||
|     """This class defines the enumerations for Super Model Running Mode.""" | ||||
|  | ||||
|     FullModel = "fullmodel" | ||||
|     Candidate = "candidate" | ||||
|     Default = "fullmodel" | ||||
|  | ||||
|  | ||||
| class ShapeContainer: | ||||
|     """A class to maintain the shape of each weight tensor for a model.""" | ||||
|  | ||||
|     def __init__(self): | ||||
|         self._names = [] | ||||
|         self._shapes = [] | ||||
|         self._name2index = dict() | ||||
|         self._param_or_buffers = [] | ||||
|  | ||||
|     @property | ||||
|     def shapes(self): | ||||
|         return self._shapes | ||||
|  | ||||
|     def __getitem__(self, index): | ||||
|         return self._shapes[index] | ||||
|  | ||||
|     def translate(self, tensors, all_none_match=True): | ||||
|         result = TensorContainer() | ||||
|         for index, name in enumerate(self._names): | ||||
|             cur_num = tensors[index].numel() | ||||
|             expected_num = self._shapes[index].numel() | ||||
|             if cur_num < expected_num or ( | ||||
|                 cur_num > expected_num and not all_none_match | ||||
|             ): | ||||
|                 raise ValueError("Invalid {:} vs {:}".format(cur_num, expected_num)) | ||||
|             cur_tensor = tensors[index].view(-1)[:expected_num] | ||||
|             new_tensor = torch.reshape(cur_tensor, self._shapes[index]) | ||||
|             result.append(name, new_tensor, self._param_or_buffers[index]) | ||||
|         return result | ||||
|  | ||||
|     def append(self, name, shape, param_or_buffer): | ||||
|         if not isinstance(shape, torch.Size): | ||||
|             raise TypeError( | ||||
|                 "The input tensor must be torch.Size instead of {:}".format(type(shape)) | ||||
|             ) | ||||
|         self._names.append(name) | ||||
|         self._shapes.append(shape) | ||||
|         self._param_or_buffers.append(param_or_buffer) | ||||
|         assert name not in self._name2index, "The [{:}] has already been added.".format( | ||||
|             name | ||||
|         ) | ||||
|         self._name2index[name] = len(self._names) - 1 | ||||
|  | ||||
|     def query(self, name): | ||||
|         if not self.has(name): | ||||
|             raise ValueError( | ||||
|                 "The {:} is not in {:}".format(name, list(self._name2index.keys())) | ||||
|             ) | ||||
|         index = self._name2index[name] | ||||
|         return self._shapes[index] | ||||
|  | ||||
|     def has(self, name): | ||||
|         return name in self._name2index | ||||
|  | ||||
|     def has_prefix(self, prefix): | ||||
|         for name, idx in self._name2index.items(): | ||||
|             if name.startswith(prefix): | ||||
|                 return name | ||||
|         return False | ||||
|  | ||||
|     def numel(self, index=None): | ||||
|         if index is None: | ||||
|             shapes = self._shapes | ||||
|         else: | ||||
|             shapes = [self._shapes[index]] | ||||
|         total = 0 | ||||
|         for shape in shapes: | ||||
|             total += shape.numel() | ||||
|         return total | ||||
|  | ||||
|     def __len__(self): | ||||
|         return len(self._names) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return "{name}({num} tensors)".format( | ||||
|             name=self.__class__.__name__, num=len(self) | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class TensorContainer: | ||||
|     """A class to maintain both parameters and buffers for a model.""" | ||||
|  | ||||
|     def __init__(self): | ||||
|         self._names = [] | ||||
|         self._tensors = [] | ||||
|         self._param_or_buffers = [] | ||||
|         self._name2index = dict() | ||||
|  | ||||
|     def additive(self, tensors): | ||||
|         result = TensorContainer() | ||||
|         for index, name in enumerate(self._names): | ||||
|             new_tensor = self._tensors[index] + tensors[index] | ||||
|             result.append(name, new_tensor, self._param_or_buffers[index]) | ||||
|         return result | ||||
|  | ||||
|     def create_container(self, tensors): | ||||
|         result = TensorContainer() | ||||
|         for index, name in enumerate(self._names): | ||||
|             new_tensor = tensors[index] | ||||
|             result.append(name, new_tensor, self._param_or_buffers[index]) | ||||
|         return result | ||||
|  | ||||
|     def no_grad_clone(self): | ||||
|         result = TensorContainer() | ||||
|         with torch.no_grad(): | ||||
|             for index, name in enumerate(self._names): | ||||
|                 result.append( | ||||
|                     name, self._tensors[index].clone(), self._param_or_buffers[index] | ||||
|                 ) | ||||
|         return result | ||||
|  | ||||
|     def to_shape_container(self): | ||||
|         result = ShapeContainer() | ||||
|         for index, name in enumerate(self._names): | ||||
|             result.append( | ||||
|                 name, self._tensors[index].shape, self._param_or_buffers[index] | ||||
|             ) | ||||
|         return result | ||||
|  | ||||
|     def requires_grad_(self, requires_grad=True): | ||||
|         for tensor in self._tensors: | ||||
|             tensor.requires_grad_(requires_grad) | ||||
|  | ||||
|     def parameters(self): | ||||
|         return self._tensors | ||||
|  | ||||
|     @property | ||||
|     def tensors(self): | ||||
|         return self._tensors | ||||
|  | ||||
|     def flatten(self, tensors=None): | ||||
|         if tensors is None: | ||||
|             tensors = self._tensors | ||||
|         tensors = [tensor.view(-1) for tensor in tensors] | ||||
|         return torch.cat(tensors) | ||||
|  | ||||
|     def unflatten(self, tensor): | ||||
|         tensors, s = [], 0 | ||||
|         for raw_tensor in self._tensors: | ||||
|             length = raw_tensor.numel() | ||||
|             x = torch.reshape(tensor[s : s + length], shape=raw_tensor.shape) | ||||
|             tensors.append(x) | ||||
|             s += length | ||||
|         return tensors | ||||
|  | ||||
|     def append(self, name, tensor, param_or_buffer): | ||||
|         if not isinstance(tensor, torch.Tensor): | ||||
|             raise TypeError( | ||||
|                 "The input tensor must be torch.Tensor instead of {:}".format( | ||||
|                     type(tensor) | ||||
|                 ) | ||||
|             ) | ||||
|         self._names.append(name) | ||||
|         self._tensors.append(tensor) | ||||
|         self._param_or_buffers.append(param_or_buffer) | ||||
|         assert name not in self._name2index, "The [{:}] has already been added.".format( | ||||
|             name | ||||
|         ) | ||||
|         self._name2index[name] = len(self._names) - 1 | ||||
|  | ||||
|     def query(self, name): | ||||
|         if not self.has(name): | ||||
|             raise ValueError( | ||||
|                 "The {:} is not in {:}".format(name, list(self._name2index.keys())) | ||||
|             ) | ||||
|         index = self._name2index[name] | ||||
|         return self._tensors[index] | ||||
|  | ||||
|     def has(self, name): | ||||
|         return name in self._name2index | ||||
|  | ||||
|     def has_prefix(self, prefix): | ||||
|         for name, idx in self._name2index.items(): | ||||
|             if name.startswith(prefix): | ||||
|                 return name | ||||
|         return False | ||||
|  | ||||
|     def numel(self): | ||||
|         total = 0 | ||||
|         for tensor in self._tensors: | ||||
|             total += tensor.numel() | ||||
|         return total | ||||
|  | ||||
|     def __len__(self): | ||||
|         return len(self._names) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return "{name}({num} tensors)".format( | ||||
|             name=self.__class__.__name__, num=len(self) | ||||
|         ) | ||||
							
								
								
									
										84
									
								
								AutoDL-Projects/xautodl/xlayers/weight_init.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								AutoDL-Projects/xautodl/xlayers/weight_init.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # Borrowed from https://github.com/rwightman/pytorch-image-models | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import math | ||||
| import warnings | ||||
|  | ||||
| # setup for xlayers | ||||
| from . import super_core | ||||
|  | ||||
|  | ||||
| def _no_grad_trunc_normal_(tensor, mean, std, a, b): | ||||
|     # Cut & paste from PyTorch official master until it's in a few official releases - RW | ||||
|     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf | ||||
|     def norm_cdf(x): | ||||
|         # Computes standard normal cumulative distribution function | ||||
|         return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 | ||||
|  | ||||
|     if (mean < a - 2 * std) or (mean > b + 2 * std): | ||||
|         warnings.warn( | ||||
|             "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " | ||||
|             "The distribution of values may be incorrect.", | ||||
|             stacklevel=2, | ||||
|         ) | ||||
|  | ||||
|     with torch.no_grad(): | ||||
|         # Values are generated by using a truncated uniform distribution and | ||||
|         # then using the inverse CDF for the normal distribution. | ||||
|         # Get upper and lower cdf values | ||||
|         l = norm_cdf((a - mean) / std) | ||||
|         u = norm_cdf((b - mean) / std) | ||||
|  | ||||
|         # Uniformly fill tensor with values from [l, u], then translate to | ||||
|         # [2l-1, 2u-1]. | ||||
|         tensor.uniform_(2 * l - 1, 2 * u - 1) | ||||
|  | ||||
|         # Use inverse cdf transform for normal distribution to get truncated | ||||
|         # standard normal | ||||
|         tensor.erfinv_() | ||||
|  | ||||
|         # Transform to proper mean, std | ||||
|         tensor.mul_(std * math.sqrt(2.0)) | ||||
|         tensor.add_(mean) | ||||
|  | ||||
|         # Clamp to ensure it's in the proper range | ||||
|         tensor.clamp_(min=a, max=b) | ||||
|         return tensor | ||||
|  | ||||
|  | ||||
| def trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0): | ||||
|     # type: (Tensor, float, float, float, float) -> Tensor | ||||
|     r"""Fills the input Tensor with values drawn from a truncated | ||||
|     normal distribution. The values are effectively drawn from the | ||||
|     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` | ||||
|     with values outside :math:`[a, b]` redrawn until they are within | ||||
|     the bounds. The method used for generating the random values works | ||||
|     best when :math:`a \leq \text{mean} \leq b`. | ||||
|     Args: | ||||
|       tensor: an n-dimensional `torch.Tensor` | ||||
|       mean: the mean of the normal distribution | ||||
|       std: the standard deviation of the normal distribution | ||||
|       a: the minimum cutoff value | ||||
|       b: the maximum cutoff value | ||||
|     Examples: | ||||
|       >>> w = torch.empty(3, 5) | ||||
|       >>> nn.init.trunc_normal_(w) | ||||
|     """ | ||||
|     if isinstance(tensor, list): | ||||
|         return [_no_grad_trunc_normal_(x, mean, std, a, b) for x in tensor] | ||||
|     else: | ||||
|         return _no_grad_trunc_normal_(tensor, mean, std, a, b) | ||||
|  | ||||
|  | ||||
| def init_transformer(m): | ||||
|     if isinstance(m, nn.Linear): | ||||
|         trunc_normal_(m.weight, std=0.02) | ||||
|         if isinstance(m, nn.Linear) and m.bias is not None: | ||||
|             nn.init.constant_(m.bias, 0) | ||||
|     elif isinstance(m, super_core.SuperLinear): | ||||
|         trunc_normal_(m._super_weight, std=0.02) | ||||
|         if m._super_bias is not None: | ||||
|             nn.init.constant_(m._super_bias, 0) | ||||
|     elif isinstance(m, super_core.SuperLayerNorm1D): | ||||
|         nn.init.constant_(m.weight, 1.0) | ||||
|         nn.init.constant_(m.bias, 0) | ||||
		Reference in New Issue
	
	Block a user