Updates
This commit is contained in:
		 Submodule .latent-data/qlib updated: 0a0c6a3185...419629e4d2
									
								
							| @@ -46,7 +46,7 @@ _default_max_depth = 5 | |||||||
| DefaultSearchSpace = dict( | DefaultSearchSpace = dict( | ||||||
|     d_feat=6, |     d_feat=6, | ||||||
|     stem_dim=spaces.Categorical(*_get_list_mul(8, 16)), |     stem_dim=spaces.Categorical(*_get_list_mul(8, 16)), | ||||||
|     embed_dims=_get_mul_specs(_get_list_mul(8, 16), _default_max_depth), |     embed_dim=spaces.Categorical(*_get_list_mul(8, 16)), | ||||||
|     num_heads=_get_mul_specs((1, 2, 4, 8), _default_max_depth), |     num_heads=_get_mul_specs((1, 2, 4, 8), _default_max_depth), | ||||||
|     mlp_hidden_multipliers=_get_mul_specs((0.5, 1, 2, 4, 8), _default_max_depth), |     mlp_hidden_multipliers=_get_mul_specs((0.5, 1, 2, 4, 8), _default_max_depth), | ||||||
|     qkv_bias=True, |     qkv_bias=True, | ||||||
| @@ -62,7 +62,7 @@ class SuperTransformer(super_core.SuperModule): | |||||||
|         self, |         self, | ||||||
|         d_feat: int = 6, |         d_feat: int = 6, | ||||||
|         stem_dim: super_core.IntSpaceType = DefaultSearchSpace["stem_dim"], |         stem_dim: super_core.IntSpaceType = DefaultSearchSpace["stem_dim"], | ||||||
|         embed_dims: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dims"], |         embed_dim: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dim"], | ||||||
|         num_heads: List[super_core.IntSpaceType] = DefaultSearchSpace["num_heads"], |         num_heads: List[super_core.IntSpaceType] = DefaultSearchSpace["num_heads"], | ||||||
|         mlp_hidden_multipliers: List[super_core.IntSpaceType] = DefaultSearchSpace[ |         mlp_hidden_multipliers: List[super_core.IntSpaceType] = DefaultSearchSpace[ | ||||||
|             "mlp_hidden_multipliers" |             "mlp_hidden_multipliers" | ||||||
| @@ -73,7 +73,7 @@ class SuperTransformer(super_core.SuperModule): | |||||||
|         max_seq_len: int = 65, |         max_seq_len: int = 65, | ||||||
|     ): |     ): | ||||||
|         super(SuperTransformer, self).__init__() |         super(SuperTransformer, self).__init__() | ||||||
|         self._embed_dims = embed_dims |         self._embed_dim = embed_dim | ||||||
|         self._stem_dim = stem_dim |         self._stem_dim = stem_dim | ||||||
|         self._num_heads = num_heads |         self._num_heads = num_heads | ||||||
|         self._mlp_hidden_multipliers = mlp_hidden_multipliers |         self._mlp_hidden_multipliers = mlp_hidden_multipliers | ||||||
| @@ -85,22 +85,15 @@ class SuperTransformer(super_core.SuperModule): | |||||||
|             d_model=stem_dim, max_seq_len=max_seq_len, dropout=pos_drop |             d_model=stem_dim, max_seq_len=max_seq_len, dropout=pos_drop | ||||||
|         ) |         ) | ||||||
|         # build the transformer encode layers -->> check params |         # build the transformer encode layers -->> check params | ||||||
|         _assert_types(embed_dims, (tuple, list)) |  | ||||||
|         _assert_types(num_heads, (tuple, list)) |         _assert_types(num_heads, (tuple, list)) | ||||||
|         _assert_types(mlp_hidden_multipliers, (tuple, list)) |         _assert_types(mlp_hidden_multipliers, (tuple, list)) | ||||||
|         num_layers = len(embed_dims) |         assert len(num_heads) == len(mlp_hidden_multipliers), "{:} vs {:}".format( | ||||||
|         assert ( |             len(num_heads), len(mlp_hidden_multipliers) | ||||||
|             num_layers == len(num_heads) == len(mlp_hidden_multipliers) |  | ||||||
|         ), "{:} vs {:} vs {:}".format( |  | ||||||
|             num_layers, len(num_heads), len(mlp_hidden_multipliers) |  | ||||||
|         ) |         ) | ||||||
|         # build the transformer encode layers -->> backbone |         # build the transformer encode layers -->> backbone | ||||||
|         layers, input_dim = [], stem_dim |         layers = [] | ||||||
|         for embed_dim, num_head, mlp_hidden_multiplier in zip( |         for num_head, mlp_hidden_multiplier in zip(num_heads, mlp_hidden_multipliers): | ||||||
|             embed_dims, num_heads, mlp_hidden_multipliers |  | ||||||
|         ): |  | ||||||
|             layer = super_core.SuperTransformerEncoderLayer( |             layer = super_core.SuperTransformerEncoderLayer( | ||||||
|                 input_dim, |  | ||||||
|                 embed_dim, |                 embed_dim, | ||||||
|                 num_head, |                 num_head, | ||||||
|                 qkv_bias, |                 qkv_bias, | ||||||
| @@ -108,11 +101,12 @@ class SuperTransformer(super_core.SuperModule): | |||||||
|                 other_drop, |                 other_drop, | ||||||
|             ) |             ) | ||||||
|             layers.append(layer) |             layers.append(layer) | ||||||
|             input_dim = embed_dim |  | ||||||
|         self.backbone = super_core.SuperSequential(*layers) |         self.backbone = super_core.SuperSequential(*layers) | ||||||
|  |  | ||||||
|         # the regression head |         # the regression head | ||||||
|         self.head = super_core.SuperLinear(self._embed_dims[-1], 1) |         self.head = super_core.SuperSequential( | ||||||
|  |             super_core.SuperLayerNorm1D(embed_dim), super_core.SuperLinear(embed_dim, 1) | ||||||
|  |         ) | ||||||
|         trunc_normal_(self.cls_token, std=0.02) |         trunc_normal_(self.cls_token, std=0.02) | ||||||
|         self.apply(self._init_weights) |         self.apply(self._init_weights) | ||||||
|  |  | ||||||
| @@ -123,14 +117,16 @@ class SuperTransformer(super_core.SuperModule): | |||||||
|     @property |     @property | ||||||
|     def abstract_search_space(self): |     def abstract_search_space(self): | ||||||
|         root_node = spaces.VirtualNode(id(self)) |         root_node = spaces.VirtualNode(id(self)) | ||||||
|  |         if not spaces.is_determined(self._stem_dim): | ||||||
|  |             root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True)) | ||||||
|  |         if not spaces.is_determined(self._stem_dim): | ||||||
|  |             root_node.append("_embed_dim", self._embed_dim.abstract(reuse_last=True)) | ||||||
|         xdict = dict( |         xdict = dict( | ||||||
|             input_embed=self.input_embed.abstract_search_space, |             input_embed=self.input_embed.abstract_search_space, | ||||||
|             pos_embed=self.pos_embed.abstract_search_space, |             pos_embed=self.pos_embed.abstract_search_space, | ||||||
|             backbone=self.backbone.abstract_search_space, |             backbone=self.backbone.abstract_search_space, | ||||||
|             head=self.head.abstract_search_space, |             head=self.head.abstract_search_space, | ||||||
|         ) |         ) | ||||||
|         if not spaces.is_determined(self._stem_dim): |  | ||||||
|             root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True)) |  | ||||||
|         for key, space in xdict.items(): |         for key, space in xdict.items(): | ||||||
|             if not spaces.is_determined(space): |             if not spaces.is_determined(space): | ||||||
|                 root_node.append(key, space) |                 root_node.append(key, space) | ||||||
| @@ -196,7 +192,7 @@ def get_transformer(config): | |||||||
|         model = SuperTransformer( |         model = SuperTransformer( | ||||||
|             d_feat=config.get("d_feat"), |             d_feat=config.get("d_feat"), | ||||||
|             stem_dim=config.get("stem_dim"), |             stem_dim=config.get("stem_dim"), | ||||||
|             embed_dims=config.get("embed_dims"), |             embed_dim=config.get("embed_dim"), | ||||||
|             num_heads=config.get("num_heads"), |             num_heads=config.get("num_heads"), | ||||||
|             mlp_hidden_multipliers=config.get("mlp_hidden_multipliers"), |             mlp_hidden_multipliers=config.get("mlp_hidden_multipliers"), | ||||||
|             qkv_bias=config.get("qkv_bias"), |             qkv_bias=config.get("qkv_bias"), | ||||||
|   | |||||||
| @@ -3,6 +3,7 @@ | |||||||
| ##################################################### | ##################################################### | ||||||
| from .super_module import SuperRunMode | from .super_module import SuperRunMode | ||||||
| from .super_module import IntSpaceType | from .super_module import IntSpaceType | ||||||
|  | from .super_module import LayerOrder | ||||||
|  |  | ||||||
| from .super_module import SuperModule | from .super_module import SuperModule | ||||||
| from .super_container import SuperSequential | from .super_container import SuperSequential | ||||||
|   | |||||||
| @@ -37,8 +37,7 @@ class SuperTransformerEncoderLayer(SuperModule): | |||||||
|  |  | ||||||
|     def __init__( |     def __init__( | ||||||
|         self, |         self, | ||||||
|         input_dim: IntSpaceType, |         d_model: IntSpaceType, | ||||||
|         output_dim: IntSpaceType, |  | ||||||
|         num_heads: IntSpaceType, |         num_heads: IntSpaceType, | ||||||
|         qkv_bias: BoolSpaceType = False, |         qkv_bias: BoolSpaceType = False, | ||||||
|         mlp_hidden_multiplier: IntSpaceType = 4, |         mlp_hidden_multiplier: IntSpaceType = 4, | ||||||
| @@ -48,40 +47,37 @@ class SuperTransformerEncoderLayer(SuperModule): | |||||||
|     ): |     ): | ||||||
|         super(SuperTransformerEncoderLayer, self).__init__() |         super(SuperTransformerEncoderLayer, self).__init__() | ||||||
|         mha = SuperAttention( |         mha = SuperAttention( | ||||||
|             input_dim, |             d_model, | ||||||
|             input_dim, |             d_model, | ||||||
|             num_heads=num_heads, |             num_heads=num_heads, | ||||||
|             qkv_bias=qkv_bias, |             qkv_bias=qkv_bias, | ||||||
|             attn_drop=drop, |             attn_drop=drop, | ||||||
|             proj_drop=drop, |             proj_drop=drop, | ||||||
|         ) |         ) | ||||||
|         drop1 = nn.Dropout(drop or 0.0) |  | ||||||
|         norm1 = SuperLayerNorm1D(input_dim) |  | ||||||
|         mlp = SuperMLPv2( |         mlp = SuperMLPv2( | ||||||
|             input_dim, |             d_model, | ||||||
|             hidden_multiplier=mlp_hidden_multiplier, |             hidden_multiplier=mlp_hidden_multiplier, | ||||||
|             out_features=output_dim, |             out_features=d_model, | ||||||
|             act_layer=act_layer, |             act_layer=act_layer, | ||||||
|             drop=drop, |             drop=drop, | ||||||
|         ) |         ) | ||||||
|         drop2 = nn.Dropout(drop or 0.0) |  | ||||||
|         norm2 = SuperLayerNorm1D(output_dim) |  | ||||||
|         if order is LayerOrder.PreNorm: |         if order is LayerOrder.PreNorm: | ||||||
|             self.norm1 = norm1 |             self.norm1 = SuperLayerNorm1D(d_model) | ||||||
|             self.mha = mha |             self.mha = mha | ||||||
|             self.drop1 = drop1 |             self.drop1 = nn.Dropout(drop or 0.0) | ||||||
|             self.norm2 = norm2 |             self.norm2 = SuperLayerNorm1D(d_model) | ||||||
|             self.mlp = mlp |             self.mlp = mlp | ||||||
|             self.drop2 = drop2 |             self.drop2 = nn.Dropout(drop or 0.0) | ||||||
|         elif order is LayerOrder.PostNoem: |         elif order is LayerOrder.PostNorm: | ||||||
|             self.mha = mha |             self.mha = mha | ||||||
|             self.drop1 = drop1 |             self.drop1 = nn.Dropout(drop or 0.0) | ||||||
|             self.norm1 = norm1 |             self.norm1 = SuperLayerNorm1D(d_model) | ||||||
|             self.mlp = mlp |             self.mlp = mlp | ||||||
|             self.drop2 = drop2 |             self.drop2 = nn.Dropout(drop or 0.0) | ||||||
|             self.norm2 = norm2 |             self.norm2 = SuperLayerNorm1D(d_model) | ||||||
|         else: |         else: | ||||||
|             raise ValueError("Unknown order: {:}".format(order)) |             raise ValueError("Unknown order: {:}".format(order)) | ||||||
|  |         self._order = order | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def abstract_search_space(self): |     def abstract_search_space(self): | ||||||
| @@ -108,18 +104,19 @@ class SuperTransformerEncoderLayer(SuperModule): | |||||||
|         return self.forward_raw(input) |         return self.forward_raw(input) | ||||||
|  |  | ||||||
|     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: |     def forward_raw(self, input: torch.Tensor) -> torch.Tensor: | ||||||
|         if order is LayerOrder.PreNorm: |         if self._order is LayerOrder.PreNorm: | ||||||
|             x = self.norm1(input) |             x = self.norm1(input) | ||||||
|             x = x + self.drop1(self.mha(x)) |             x = x + self.drop1(self.mha(x)) | ||||||
|             x = self.norm2(x) |             x = self.norm2(x) | ||||||
|             x = x + self.drop2(self.mlp(x)) |             x = x + self.drop2(self.mlp(x)) | ||||||
|         elif order is LayerOrder.PostNoem: |         elif self._order is LayerOrder.PostNorm: | ||||||
|             # multi-head attention |             # multi-head attention | ||||||
|             x = x + self.drop1(self.mha(input)) |             x = self.mha(input) | ||||||
|  |             x = x + self.drop1(x) | ||||||
|             x = self.norm1(x) |             x = self.norm1(x) | ||||||
|             # feed-forward layer |             # feed-forward layer | ||||||
|             x = x + self.drop2(self.mlp(x)) |             x = x + self.drop2(self.mlp(x)) | ||||||
|             x = self.norm2(x) |             x = self.norm2(x) | ||||||
|         else: |         else: | ||||||
|             raise ValueError("Unknown order: {:}".format(order)) |             raise ValueError("Unknown order: {:}".format(self._order)) | ||||||
|         return x |         return x | ||||||
|   | |||||||
| @@ -53,11 +53,13 @@ class TestSuperAttention(unittest.TestCase): | |||||||
|     @parameterized.expand([[6], [12], [24], [48]]) |     @parameterized.expand([[6], [12], [24], [48]]) | ||||||
|     def test_transformer_encoder(self, input_dim): |     def test_transformer_encoder(self, input_dim): | ||||||
|         output_dim = spaces.Categorical(12, 24, 36) |         output_dim = spaces.Categorical(12, 24, 36) | ||||||
|         model = super_core.SuperTransformerEncoderLayer( |         model = super_core.SuperSequential( | ||||||
|             input_dim, |             super_core.SuperLinear(input_dim, output_dim), | ||||||
|             output_dim=output_dim, |             super_core.SuperTransformerEncoderLayer( | ||||||
|             num_heads=spaces.Categorical(2, 4, 6), |                 output_dim, | ||||||
|             mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), |                 num_heads=spaces.Categorical(2, 4, 6), | ||||||
|  |                 mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), | ||||||
|  |             ), | ||||||
|         ) |         ) | ||||||
|         print(model) |         print(model) | ||||||
|         model.apply_verbose(True) |         model.apply_verbose(True) | ||||||
|   | |||||||
| @@ -36,25 +36,31 @@ def _internal_func(inputs, model): | |||||||
|     return abstract_child, outputs |     return abstract_child, outputs | ||||||
|  |  | ||||||
|  |  | ||||||
| def _create_stel(input_dim, output_dim): | def _create_stel(input_dim, output_dim, order): | ||||||
|     return super_core.SuperTransformerEncoderLayer( |     return super_core.SuperSequential( | ||||||
|         input_dim, |         super_core.SuperLinear(input_dim, output_dim), | ||||||
|         output_dim, |         super_core.SuperTransformerEncoderLayer( | ||||||
|         num_heads=spaces.Categorical(2, 4, 6), |             output_dim, | ||||||
|         mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), |             num_heads=spaces.Categorical(2, 4, 6), | ||||||
|  |             mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), | ||||||
|  |             order=order, | ||||||
|  |         ), | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @pytest.mark.parametrize("batch", (1, 2, 4)) | @pytest.mark.parametrize("batch", (1, 2, 4)) | ||||||
| @pytest.mark.parametrize("seq_dim", (1, 10, 30)) | @pytest.mark.parametrize("seq_dim", (1, 10, 30)) | ||||||
| @pytest.mark.parametrize("input_dim", (6, 12, 24, 27)) | @pytest.mark.parametrize("input_dim", (6, 12, 24, 27)) | ||||||
| def test_super_sequential(batch, seq_dim, input_dim): | @pytest.mark.parametrize( | ||||||
|  |     "order", (super_core.LayerOrder.PreNorm, super_core.LayerOrder.PostNorm) | ||||||
|  | ) | ||||||
|  | def test_super_sequential(batch, seq_dim, input_dim, order): | ||||||
|     out1_dim = spaces.Categorical(12, 24, 36) |     out1_dim = spaces.Categorical(12, 24, 36) | ||||||
|     out2_dim = spaces.Categorical(24, 36, 48) |     out2_dim = spaces.Categorical(24, 36, 48) | ||||||
|     out3_dim = spaces.Categorical(36, 72, 100) |     out3_dim = spaces.Categorical(36, 72, 100) | ||||||
|     layer1 = _create_stel(input_dim, out1_dim) |     layer1 = _create_stel(input_dim, out1_dim, order) | ||||||
|     layer2 = _create_stel(out1_dim, out2_dim) |     layer2 = _create_stel(out1_dim, out2_dim, order) | ||||||
|     layer3 = _create_stel(out2_dim, out3_dim) |     layer3 = _create_stel(out2_dim, out3_dim, order) | ||||||
|     model = super_core.SuperSequential(layer1, layer2, layer3) |     model = super_core.SuperSequential(layer1, layer2, layer3) | ||||||
|     print(model) |     print(model) | ||||||
|     model.apply_verbose(True) |     model.apply_verbose(True) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user