From 15dda79e3b4cabcd78c53ca18d6ff1950878c1e8 Mon Sep 17 00:00:00 2001 From: D-X-Y <280835372@qq.com> Date: Wed, 24 Mar 2021 05:33:52 -0700 Subject: [PATCH] Updates --- .latent-data/qlib | 2 +- lib/trade_models/transformers.py | 34 +++++++++++-------------- lib/xlayers/super_core.py | 1 + lib/xlayers/super_transformer.py | 43 +++++++++++++++----------------- tests/test_super_att.py | 12 +++++---- tests/test_super_container.py | 26 +++++++++++-------- 6 files changed, 60 insertions(+), 58 deletions(-) diff --git a/.latent-data/qlib b/.latent-data/qlib index 0a0c6a3..419629e 160000 --- a/.latent-data/qlib +++ b/.latent-data/qlib @@ -1 +1 @@ -Subproject commit 0a0c6a3185ac6bcec38b756f039b9ccc64b41827 +Subproject commit 419629e4d2eefed52ceb207afb887a47aac732ca diff --git a/lib/trade_models/transformers.py b/lib/trade_models/transformers.py index 2aa3595..9d7d5d3 100644 --- a/lib/trade_models/transformers.py +++ b/lib/trade_models/transformers.py @@ -46,7 +46,7 @@ _default_max_depth = 5 DefaultSearchSpace = dict( d_feat=6, stem_dim=spaces.Categorical(*_get_list_mul(8, 16)), - embed_dims=_get_mul_specs(_get_list_mul(8, 16), _default_max_depth), + embed_dim=spaces.Categorical(*_get_list_mul(8, 16)), num_heads=_get_mul_specs((1, 2, 4, 8), _default_max_depth), mlp_hidden_multipliers=_get_mul_specs((0.5, 1, 2, 4, 8), _default_max_depth), qkv_bias=True, @@ -62,7 +62,7 @@ class SuperTransformer(super_core.SuperModule): self, d_feat: int = 6, stem_dim: super_core.IntSpaceType = DefaultSearchSpace["stem_dim"], - embed_dims: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dims"], + embed_dim: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dim"], num_heads: List[super_core.IntSpaceType] = DefaultSearchSpace["num_heads"], mlp_hidden_multipliers: List[super_core.IntSpaceType] = DefaultSearchSpace[ "mlp_hidden_multipliers" @@ -73,7 +73,7 @@ class SuperTransformer(super_core.SuperModule): max_seq_len: int = 65, ): super(SuperTransformer, self).__init__() - self._embed_dims = embed_dims + self._embed_dim = embed_dim self._stem_dim = stem_dim self._num_heads = num_heads self._mlp_hidden_multipliers = mlp_hidden_multipliers @@ -85,22 +85,15 @@ class SuperTransformer(super_core.SuperModule): d_model=stem_dim, max_seq_len=max_seq_len, dropout=pos_drop ) # build the transformer encode layers -->> check params - _assert_types(embed_dims, (tuple, list)) _assert_types(num_heads, (tuple, list)) _assert_types(mlp_hidden_multipliers, (tuple, list)) - num_layers = len(embed_dims) - assert ( - num_layers == len(num_heads) == len(mlp_hidden_multipliers) - ), "{:} vs {:} vs {:}".format( - num_layers, len(num_heads), len(mlp_hidden_multipliers) + assert len(num_heads) == len(mlp_hidden_multipliers), "{:} vs {:}".format( + len(num_heads), len(mlp_hidden_multipliers) ) # build the transformer encode layers -->> backbone - layers, input_dim = [], stem_dim - for embed_dim, num_head, mlp_hidden_multiplier in zip( - embed_dims, num_heads, mlp_hidden_multipliers - ): + layers = [] + for num_head, mlp_hidden_multiplier in zip(num_heads, mlp_hidden_multipliers): layer = super_core.SuperTransformerEncoderLayer( - input_dim, embed_dim, num_head, qkv_bias, @@ -108,11 +101,12 @@ class SuperTransformer(super_core.SuperModule): other_drop, ) layers.append(layer) - input_dim = embed_dim self.backbone = super_core.SuperSequential(*layers) # the regression head - self.head = super_core.SuperLinear(self._embed_dims[-1], 1) + self.head = super_core.SuperSequential( + super_core.SuperLayerNorm1D(embed_dim), super_core.SuperLinear(embed_dim, 1) + ) trunc_normal_(self.cls_token, std=0.02) self.apply(self._init_weights) @@ -123,14 +117,16 @@ class SuperTransformer(super_core.SuperModule): @property def abstract_search_space(self): root_node = spaces.VirtualNode(id(self)) + if not spaces.is_determined(self._stem_dim): + root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True)) + if not spaces.is_determined(self._stem_dim): + root_node.append("_embed_dim", self._embed_dim.abstract(reuse_last=True)) xdict = dict( input_embed=self.input_embed.abstract_search_space, pos_embed=self.pos_embed.abstract_search_space, backbone=self.backbone.abstract_search_space, head=self.head.abstract_search_space, ) - if not spaces.is_determined(self._stem_dim): - root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True)) for key, space in xdict.items(): if not spaces.is_determined(space): root_node.append(key, space) @@ -196,7 +192,7 @@ def get_transformer(config): model = SuperTransformer( d_feat=config.get("d_feat"), stem_dim=config.get("stem_dim"), - embed_dims=config.get("embed_dims"), + embed_dim=config.get("embed_dim"), num_heads=config.get("num_heads"), mlp_hidden_multipliers=config.get("mlp_hidden_multipliers"), qkv_bias=config.get("qkv_bias"), diff --git a/lib/xlayers/super_core.py b/lib/xlayers/super_core.py index 5eb09cb..b1aa9e3 100644 --- a/lib/xlayers/super_core.py +++ b/lib/xlayers/super_core.py @@ -3,6 +3,7 @@ ##################################################### from .super_module import SuperRunMode from .super_module import IntSpaceType +from .super_module import LayerOrder from .super_module import SuperModule from .super_container import SuperSequential diff --git a/lib/xlayers/super_transformer.py b/lib/xlayers/super_transformer.py index 72684da..f21ac54 100644 --- a/lib/xlayers/super_transformer.py +++ b/lib/xlayers/super_transformer.py @@ -37,8 +37,7 @@ class SuperTransformerEncoderLayer(SuperModule): def __init__( self, - input_dim: IntSpaceType, - output_dim: IntSpaceType, + d_model: IntSpaceType, num_heads: IntSpaceType, qkv_bias: BoolSpaceType = False, mlp_hidden_multiplier: IntSpaceType = 4, @@ -48,40 +47,37 @@ class SuperTransformerEncoderLayer(SuperModule): ): super(SuperTransformerEncoderLayer, self).__init__() mha = SuperAttention( - input_dim, - input_dim, + d_model, + d_model, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=drop, proj_drop=drop, ) - drop1 = nn.Dropout(drop or 0.0) - norm1 = SuperLayerNorm1D(input_dim) mlp = SuperMLPv2( - input_dim, + d_model, hidden_multiplier=mlp_hidden_multiplier, - out_features=output_dim, + out_features=d_model, act_layer=act_layer, drop=drop, ) - drop2 = nn.Dropout(drop or 0.0) - norm2 = SuperLayerNorm1D(output_dim) if order is LayerOrder.PreNorm: - self.norm1 = norm1 + self.norm1 = SuperLayerNorm1D(d_model) self.mha = mha - self.drop1 = drop1 - self.norm2 = norm2 + self.drop1 = nn.Dropout(drop or 0.0) + self.norm2 = SuperLayerNorm1D(d_model) self.mlp = mlp - self.drop2 = drop2 - elif order is LayerOrder.PostNoem: + self.drop2 = nn.Dropout(drop or 0.0) + elif order is LayerOrder.PostNorm: self.mha = mha - self.drop1 = drop1 - self.norm1 = norm1 + self.drop1 = nn.Dropout(drop or 0.0) + self.norm1 = SuperLayerNorm1D(d_model) self.mlp = mlp - self.drop2 = drop2 - self.norm2 = norm2 + self.drop2 = nn.Dropout(drop or 0.0) + self.norm2 = SuperLayerNorm1D(d_model) else: raise ValueError("Unknown order: {:}".format(order)) + self._order = order @property def abstract_search_space(self): @@ -108,18 +104,19 @@ class SuperTransformerEncoderLayer(SuperModule): return self.forward_raw(input) def forward_raw(self, input: torch.Tensor) -> torch.Tensor: - if order is LayerOrder.PreNorm: + if self._order is LayerOrder.PreNorm: x = self.norm1(input) x = x + self.drop1(self.mha(x)) x = self.norm2(x) x = x + self.drop2(self.mlp(x)) - elif order is LayerOrder.PostNoem: + elif self._order is LayerOrder.PostNorm: # multi-head attention - x = x + self.drop1(self.mha(input)) + x = self.mha(input) + x = x + self.drop1(x) x = self.norm1(x) # feed-forward layer x = x + self.drop2(self.mlp(x)) x = self.norm2(x) else: - raise ValueError("Unknown order: {:}".format(order)) + raise ValueError("Unknown order: {:}".format(self._order)) return x diff --git a/tests/test_super_att.py b/tests/test_super_att.py index c4a0900..dfb2cdf 100644 --- a/tests/test_super_att.py +++ b/tests/test_super_att.py @@ -53,11 +53,13 @@ class TestSuperAttention(unittest.TestCase): @parameterized.expand([[6], [12], [24], [48]]) def test_transformer_encoder(self, input_dim): output_dim = spaces.Categorical(12, 24, 36) - model = super_core.SuperTransformerEncoderLayer( - input_dim, - output_dim=output_dim, - num_heads=spaces.Categorical(2, 4, 6), - mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), + model = super_core.SuperSequential( + super_core.SuperLinear(input_dim, output_dim), + super_core.SuperTransformerEncoderLayer( + output_dim, + num_heads=spaces.Categorical(2, 4, 6), + mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), + ), ) print(model) model.apply_verbose(True) diff --git a/tests/test_super_container.py b/tests/test_super_container.py index 56bb77a..d8fd271 100644 --- a/tests/test_super_container.py +++ b/tests/test_super_container.py @@ -36,25 +36,31 @@ def _internal_func(inputs, model): return abstract_child, outputs -def _create_stel(input_dim, output_dim): - return super_core.SuperTransformerEncoderLayer( - input_dim, - output_dim, - num_heads=spaces.Categorical(2, 4, 6), - mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), +def _create_stel(input_dim, output_dim, order): + return super_core.SuperSequential( + super_core.SuperLinear(input_dim, output_dim), + super_core.SuperTransformerEncoderLayer( + output_dim, + num_heads=spaces.Categorical(2, 4, 6), + mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), + order=order, + ), ) @pytest.mark.parametrize("batch", (1, 2, 4)) @pytest.mark.parametrize("seq_dim", (1, 10, 30)) @pytest.mark.parametrize("input_dim", (6, 12, 24, 27)) -def test_super_sequential(batch, seq_dim, input_dim): +@pytest.mark.parametrize( + "order", (super_core.LayerOrder.PreNorm, super_core.LayerOrder.PostNorm) +) +def test_super_sequential(batch, seq_dim, input_dim, order): out1_dim = spaces.Categorical(12, 24, 36) out2_dim = spaces.Categorical(24, 36, 48) out3_dim = spaces.Categorical(36, 72, 100) - layer1 = _create_stel(input_dim, out1_dim) - layer2 = _create_stel(out1_dim, out2_dim) - layer3 = _create_stel(out2_dim, out3_dim) + layer1 = _create_stel(input_dim, out1_dim, order) + layer2 = _create_stel(out1_dim, out2_dim, order) + layer3 = _create_stel(out2_dim, out3_dim, order) model = super_core.SuperSequential(layer1, layer2, layer3) print(model) model.apply_verbose(True)