This commit is contained in:
D-X-Y 2021-03-24 05:33:52 -07:00
parent 379b904203
commit 15dda79e3b
6 changed files with 60 additions and 58 deletions

@ -1 +1 @@
Subproject commit 0a0c6a3185ac6bcec38b756f039b9ccc64b41827 Subproject commit 419629e4d2eefed52ceb207afb887a47aac732ca

View File

@ -46,7 +46,7 @@ _default_max_depth = 5
DefaultSearchSpace = dict( DefaultSearchSpace = dict(
d_feat=6, d_feat=6,
stem_dim=spaces.Categorical(*_get_list_mul(8, 16)), stem_dim=spaces.Categorical(*_get_list_mul(8, 16)),
embed_dims=_get_mul_specs(_get_list_mul(8, 16), _default_max_depth), embed_dim=spaces.Categorical(*_get_list_mul(8, 16)),
num_heads=_get_mul_specs((1, 2, 4, 8), _default_max_depth), num_heads=_get_mul_specs((1, 2, 4, 8), _default_max_depth),
mlp_hidden_multipliers=_get_mul_specs((0.5, 1, 2, 4, 8), _default_max_depth), mlp_hidden_multipliers=_get_mul_specs((0.5, 1, 2, 4, 8), _default_max_depth),
qkv_bias=True, qkv_bias=True,
@ -62,7 +62,7 @@ class SuperTransformer(super_core.SuperModule):
self, self,
d_feat: int = 6, d_feat: int = 6,
stem_dim: super_core.IntSpaceType = DefaultSearchSpace["stem_dim"], stem_dim: super_core.IntSpaceType = DefaultSearchSpace["stem_dim"],
embed_dims: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dims"], embed_dim: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dim"],
num_heads: List[super_core.IntSpaceType] = DefaultSearchSpace["num_heads"], num_heads: List[super_core.IntSpaceType] = DefaultSearchSpace["num_heads"],
mlp_hidden_multipliers: List[super_core.IntSpaceType] = DefaultSearchSpace[ mlp_hidden_multipliers: List[super_core.IntSpaceType] = DefaultSearchSpace[
"mlp_hidden_multipliers" "mlp_hidden_multipliers"
@ -73,7 +73,7 @@ class SuperTransformer(super_core.SuperModule):
max_seq_len: int = 65, max_seq_len: int = 65,
): ):
super(SuperTransformer, self).__init__() super(SuperTransformer, self).__init__()
self._embed_dims = embed_dims self._embed_dim = embed_dim
self._stem_dim = stem_dim self._stem_dim = stem_dim
self._num_heads = num_heads self._num_heads = num_heads
self._mlp_hidden_multipliers = mlp_hidden_multipliers self._mlp_hidden_multipliers = mlp_hidden_multipliers
@ -85,22 +85,15 @@ class SuperTransformer(super_core.SuperModule):
d_model=stem_dim, max_seq_len=max_seq_len, dropout=pos_drop d_model=stem_dim, max_seq_len=max_seq_len, dropout=pos_drop
) )
# build the transformer encode layers -->> check params # build the transformer encode layers -->> check params
_assert_types(embed_dims, (tuple, list))
_assert_types(num_heads, (tuple, list)) _assert_types(num_heads, (tuple, list))
_assert_types(mlp_hidden_multipliers, (tuple, list)) _assert_types(mlp_hidden_multipliers, (tuple, list))
num_layers = len(embed_dims) assert len(num_heads) == len(mlp_hidden_multipliers), "{:} vs {:}".format(
assert ( len(num_heads), len(mlp_hidden_multipliers)
num_layers == len(num_heads) == len(mlp_hidden_multipliers)
), "{:} vs {:} vs {:}".format(
num_layers, len(num_heads), len(mlp_hidden_multipliers)
) )
# build the transformer encode layers -->> backbone # build the transformer encode layers -->> backbone
layers, input_dim = [], stem_dim layers = []
for embed_dim, num_head, mlp_hidden_multiplier in zip( for num_head, mlp_hidden_multiplier in zip(num_heads, mlp_hidden_multipliers):
embed_dims, num_heads, mlp_hidden_multipliers
):
layer = super_core.SuperTransformerEncoderLayer( layer = super_core.SuperTransformerEncoderLayer(
input_dim,
embed_dim, embed_dim,
num_head, num_head,
qkv_bias, qkv_bias,
@ -108,11 +101,12 @@ class SuperTransformer(super_core.SuperModule):
other_drop, other_drop,
) )
layers.append(layer) layers.append(layer)
input_dim = embed_dim
self.backbone = super_core.SuperSequential(*layers) self.backbone = super_core.SuperSequential(*layers)
# the regression head # the regression head
self.head = super_core.SuperLinear(self._embed_dims[-1], 1) self.head = super_core.SuperSequential(
super_core.SuperLayerNorm1D(embed_dim), super_core.SuperLinear(embed_dim, 1)
)
trunc_normal_(self.cls_token, std=0.02) trunc_normal_(self.cls_token, std=0.02)
self.apply(self._init_weights) self.apply(self._init_weights)
@ -123,14 +117,16 @@ class SuperTransformer(super_core.SuperModule):
@property @property
def abstract_search_space(self): def abstract_search_space(self):
root_node = spaces.VirtualNode(id(self)) root_node = spaces.VirtualNode(id(self))
if not spaces.is_determined(self._stem_dim):
root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True))
if not spaces.is_determined(self._stem_dim):
root_node.append("_embed_dim", self._embed_dim.abstract(reuse_last=True))
xdict = dict( xdict = dict(
input_embed=self.input_embed.abstract_search_space, input_embed=self.input_embed.abstract_search_space,
pos_embed=self.pos_embed.abstract_search_space, pos_embed=self.pos_embed.abstract_search_space,
backbone=self.backbone.abstract_search_space, backbone=self.backbone.abstract_search_space,
head=self.head.abstract_search_space, head=self.head.abstract_search_space,
) )
if not spaces.is_determined(self._stem_dim):
root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True))
for key, space in xdict.items(): for key, space in xdict.items():
if not spaces.is_determined(space): if not spaces.is_determined(space):
root_node.append(key, space) root_node.append(key, space)
@ -196,7 +192,7 @@ def get_transformer(config):
model = SuperTransformer( model = SuperTransformer(
d_feat=config.get("d_feat"), d_feat=config.get("d_feat"),
stem_dim=config.get("stem_dim"), stem_dim=config.get("stem_dim"),
embed_dims=config.get("embed_dims"), embed_dim=config.get("embed_dim"),
num_heads=config.get("num_heads"), num_heads=config.get("num_heads"),
mlp_hidden_multipliers=config.get("mlp_hidden_multipliers"), mlp_hidden_multipliers=config.get("mlp_hidden_multipliers"),
qkv_bias=config.get("qkv_bias"), qkv_bias=config.get("qkv_bias"),

View File

@ -3,6 +3,7 @@
##################################################### #####################################################
from .super_module import SuperRunMode from .super_module import SuperRunMode
from .super_module import IntSpaceType from .super_module import IntSpaceType
from .super_module import LayerOrder
from .super_module import SuperModule from .super_module import SuperModule
from .super_container import SuperSequential from .super_container import SuperSequential

View File

@ -37,8 +37,7 @@ class SuperTransformerEncoderLayer(SuperModule):
def __init__( def __init__(
self, self,
input_dim: IntSpaceType, d_model: IntSpaceType,
output_dim: IntSpaceType,
num_heads: IntSpaceType, num_heads: IntSpaceType,
qkv_bias: BoolSpaceType = False, qkv_bias: BoolSpaceType = False,
mlp_hidden_multiplier: IntSpaceType = 4, mlp_hidden_multiplier: IntSpaceType = 4,
@ -48,40 +47,37 @@ class SuperTransformerEncoderLayer(SuperModule):
): ):
super(SuperTransformerEncoderLayer, self).__init__() super(SuperTransformerEncoderLayer, self).__init__()
mha = SuperAttention( mha = SuperAttention(
input_dim, d_model,
input_dim, d_model,
num_heads=num_heads, num_heads=num_heads,
qkv_bias=qkv_bias, qkv_bias=qkv_bias,
attn_drop=drop, attn_drop=drop,
proj_drop=drop, proj_drop=drop,
) )
drop1 = nn.Dropout(drop or 0.0)
norm1 = SuperLayerNorm1D(input_dim)
mlp = SuperMLPv2( mlp = SuperMLPv2(
input_dim, d_model,
hidden_multiplier=mlp_hidden_multiplier, hidden_multiplier=mlp_hidden_multiplier,
out_features=output_dim, out_features=d_model,
act_layer=act_layer, act_layer=act_layer,
drop=drop, drop=drop,
) )
drop2 = nn.Dropout(drop or 0.0)
norm2 = SuperLayerNorm1D(output_dim)
if order is LayerOrder.PreNorm: if order is LayerOrder.PreNorm:
self.norm1 = norm1 self.norm1 = SuperLayerNorm1D(d_model)
self.mha = mha self.mha = mha
self.drop1 = drop1 self.drop1 = nn.Dropout(drop or 0.0)
self.norm2 = norm2 self.norm2 = SuperLayerNorm1D(d_model)
self.mlp = mlp self.mlp = mlp
self.drop2 = drop2 self.drop2 = nn.Dropout(drop or 0.0)
elif order is LayerOrder.PostNoem: elif order is LayerOrder.PostNorm:
self.mha = mha self.mha = mha
self.drop1 = drop1 self.drop1 = nn.Dropout(drop or 0.0)
self.norm1 = norm1 self.norm1 = SuperLayerNorm1D(d_model)
self.mlp = mlp self.mlp = mlp
self.drop2 = drop2 self.drop2 = nn.Dropout(drop or 0.0)
self.norm2 = norm2 self.norm2 = SuperLayerNorm1D(d_model)
else: else:
raise ValueError("Unknown order: {:}".format(order)) raise ValueError("Unknown order: {:}".format(order))
self._order = order
@property @property
def abstract_search_space(self): def abstract_search_space(self):
@ -108,18 +104,19 @@ class SuperTransformerEncoderLayer(SuperModule):
return self.forward_raw(input) return self.forward_raw(input)
def forward_raw(self, input: torch.Tensor) -> torch.Tensor: def forward_raw(self, input: torch.Tensor) -> torch.Tensor:
if order is LayerOrder.PreNorm: if self._order is LayerOrder.PreNorm:
x = self.norm1(input) x = self.norm1(input)
x = x + self.drop1(self.mha(x)) x = x + self.drop1(self.mha(x))
x = self.norm2(x) x = self.norm2(x)
x = x + self.drop2(self.mlp(x)) x = x + self.drop2(self.mlp(x))
elif order is LayerOrder.PostNoem: elif self._order is LayerOrder.PostNorm:
# multi-head attention # multi-head attention
x = x + self.drop1(self.mha(input)) x = self.mha(input)
x = x + self.drop1(x)
x = self.norm1(x) x = self.norm1(x)
# feed-forward layer # feed-forward layer
x = x + self.drop2(self.mlp(x)) x = x + self.drop2(self.mlp(x))
x = self.norm2(x) x = self.norm2(x)
else: else:
raise ValueError("Unknown order: {:}".format(order)) raise ValueError("Unknown order: {:}".format(self._order))
return x return x

View File

@ -53,11 +53,13 @@ class TestSuperAttention(unittest.TestCase):
@parameterized.expand([[6], [12], [24], [48]]) @parameterized.expand([[6], [12], [24], [48]])
def test_transformer_encoder(self, input_dim): def test_transformer_encoder(self, input_dim):
output_dim = spaces.Categorical(12, 24, 36) output_dim = spaces.Categorical(12, 24, 36)
model = super_core.SuperTransformerEncoderLayer( model = super_core.SuperSequential(
input_dim, super_core.SuperLinear(input_dim, output_dim),
output_dim=output_dim, super_core.SuperTransformerEncoderLayer(
num_heads=spaces.Categorical(2, 4, 6), output_dim,
mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), num_heads=spaces.Categorical(2, 4, 6),
mlp_hidden_multiplier=spaces.Categorical(1, 2, 4),
),
) )
print(model) print(model)
model.apply_verbose(True) model.apply_verbose(True)

View File

@ -36,25 +36,31 @@ def _internal_func(inputs, model):
return abstract_child, outputs return abstract_child, outputs
def _create_stel(input_dim, output_dim): def _create_stel(input_dim, output_dim, order):
return super_core.SuperTransformerEncoderLayer( return super_core.SuperSequential(
input_dim, super_core.SuperLinear(input_dim, output_dim),
output_dim, super_core.SuperTransformerEncoderLayer(
num_heads=spaces.Categorical(2, 4, 6), output_dim,
mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), num_heads=spaces.Categorical(2, 4, 6),
mlp_hidden_multiplier=spaces.Categorical(1, 2, 4),
order=order,
),
) )
@pytest.mark.parametrize("batch", (1, 2, 4)) @pytest.mark.parametrize("batch", (1, 2, 4))
@pytest.mark.parametrize("seq_dim", (1, 10, 30)) @pytest.mark.parametrize("seq_dim", (1, 10, 30))
@pytest.mark.parametrize("input_dim", (6, 12, 24, 27)) @pytest.mark.parametrize("input_dim", (6, 12, 24, 27))
def test_super_sequential(batch, seq_dim, input_dim): @pytest.mark.parametrize(
"order", (super_core.LayerOrder.PreNorm, super_core.LayerOrder.PostNorm)
)
def test_super_sequential(batch, seq_dim, input_dim, order):
out1_dim = spaces.Categorical(12, 24, 36) out1_dim = spaces.Categorical(12, 24, 36)
out2_dim = spaces.Categorical(24, 36, 48) out2_dim = spaces.Categorical(24, 36, 48)
out3_dim = spaces.Categorical(36, 72, 100) out3_dim = spaces.Categorical(36, 72, 100)
layer1 = _create_stel(input_dim, out1_dim) layer1 = _create_stel(input_dim, out1_dim, order)
layer2 = _create_stel(out1_dim, out2_dim) layer2 = _create_stel(out1_dim, out2_dim, order)
layer3 = _create_stel(out2_dim, out3_dim) layer3 = _create_stel(out2_dim, out3_dim, order)
model = super_core.SuperSequential(layer1, layer2, layer3) model = super_core.SuperSequential(layer1, layer2, layer3)
print(model) print(model)
model.apply_verbose(True) model.apply_verbose(True)