This commit is contained in:
D-X-Y 2021-03-24 05:33:52 -07:00
parent 379b904203
commit 15dda79e3b
6 changed files with 60 additions and 58 deletions

@ -1 +1 @@
Subproject commit 0a0c6a3185ac6bcec38b756f039b9ccc64b41827
Subproject commit 419629e4d2eefed52ceb207afb887a47aac732ca

View File

@ -46,7 +46,7 @@ _default_max_depth = 5
DefaultSearchSpace = dict(
d_feat=6,
stem_dim=spaces.Categorical(*_get_list_mul(8, 16)),
embed_dims=_get_mul_specs(_get_list_mul(8, 16), _default_max_depth),
embed_dim=spaces.Categorical(*_get_list_mul(8, 16)),
num_heads=_get_mul_specs((1, 2, 4, 8), _default_max_depth),
mlp_hidden_multipliers=_get_mul_specs((0.5, 1, 2, 4, 8), _default_max_depth),
qkv_bias=True,
@ -62,7 +62,7 @@ class SuperTransformer(super_core.SuperModule):
self,
d_feat: int = 6,
stem_dim: super_core.IntSpaceType = DefaultSearchSpace["stem_dim"],
embed_dims: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dims"],
embed_dim: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dim"],
num_heads: List[super_core.IntSpaceType] = DefaultSearchSpace["num_heads"],
mlp_hidden_multipliers: List[super_core.IntSpaceType] = DefaultSearchSpace[
"mlp_hidden_multipliers"
@ -73,7 +73,7 @@ class SuperTransformer(super_core.SuperModule):
max_seq_len: int = 65,
):
super(SuperTransformer, self).__init__()
self._embed_dims = embed_dims
self._embed_dim = embed_dim
self._stem_dim = stem_dim
self._num_heads = num_heads
self._mlp_hidden_multipliers = mlp_hidden_multipliers
@ -85,22 +85,15 @@ class SuperTransformer(super_core.SuperModule):
d_model=stem_dim, max_seq_len=max_seq_len, dropout=pos_drop
)
# build the transformer encode layers -->> check params
_assert_types(embed_dims, (tuple, list))
_assert_types(num_heads, (tuple, list))
_assert_types(mlp_hidden_multipliers, (tuple, list))
num_layers = len(embed_dims)
assert (
num_layers == len(num_heads) == len(mlp_hidden_multipliers)
), "{:} vs {:} vs {:}".format(
num_layers, len(num_heads), len(mlp_hidden_multipliers)
assert len(num_heads) == len(mlp_hidden_multipliers), "{:} vs {:}".format(
len(num_heads), len(mlp_hidden_multipliers)
)
# build the transformer encode layers -->> backbone
layers, input_dim = [], stem_dim
for embed_dim, num_head, mlp_hidden_multiplier in zip(
embed_dims, num_heads, mlp_hidden_multipliers
):
layers = []
for num_head, mlp_hidden_multiplier in zip(num_heads, mlp_hidden_multipliers):
layer = super_core.SuperTransformerEncoderLayer(
input_dim,
embed_dim,
num_head,
qkv_bias,
@ -108,11 +101,12 @@ class SuperTransformer(super_core.SuperModule):
other_drop,
)
layers.append(layer)
input_dim = embed_dim
self.backbone = super_core.SuperSequential(*layers)
# the regression head
self.head = super_core.SuperLinear(self._embed_dims[-1], 1)
self.head = super_core.SuperSequential(
super_core.SuperLayerNorm1D(embed_dim), super_core.SuperLinear(embed_dim, 1)
)
trunc_normal_(self.cls_token, std=0.02)
self.apply(self._init_weights)
@ -123,14 +117,16 @@ class SuperTransformer(super_core.SuperModule):
@property
def abstract_search_space(self):
root_node = spaces.VirtualNode(id(self))
if not spaces.is_determined(self._stem_dim):
root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True))
if not spaces.is_determined(self._stem_dim):
root_node.append("_embed_dim", self._embed_dim.abstract(reuse_last=True))
xdict = dict(
input_embed=self.input_embed.abstract_search_space,
pos_embed=self.pos_embed.abstract_search_space,
backbone=self.backbone.abstract_search_space,
head=self.head.abstract_search_space,
)
if not spaces.is_determined(self._stem_dim):
root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True))
for key, space in xdict.items():
if not spaces.is_determined(space):
root_node.append(key, space)
@ -196,7 +192,7 @@ def get_transformer(config):
model = SuperTransformer(
d_feat=config.get("d_feat"),
stem_dim=config.get("stem_dim"),
embed_dims=config.get("embed_dims"),
embed_dim=config.get("embed_dim"),
num_heads=config.get("num_heads"),
mlp_hidden_multipliers=config.get("mlp_hidden_multipliers"),
qkv_bias=config.get("qkv_bias"),

View File

@ -3,6 +3,7 @@
#####################################################
from .super_module import SuperRunMode
from .super_module import IntSpaceType
from .super_module import LayerOrder
from .super_module import SuperModule
from .super_container import SuperSequential

View File

@ -37,8 +37,7 @@ class SuperTransformerEncoderLayer(SuperModule):
def __init__(
self,
input_dim: IntSpaceType,
output_dim: IntSpaceType,
d_model: IntSpaceType,
num_heads: IntSpaceType,
qkv_bias: BoolSpaceType = False,
mlp_hidden_multiplier: IntSpaceType = 4,
@ -48,40 +47,37 @@ class SuperTransformerEncoderLayer(SuperModule):
):
super(SuperTransformerEncoderLayer, self).__init__()
mha = SuperAttention(
input_dim,
input_dim,
d_model,
d_model,
num_heads=num_heads,
qkv_bias=qkv_bias,
attn_drop=drop,
proj_drop=drop,
)
drop1 = nn.Dropout(drop or 0.0)
norm1 = SuperLayerNorm1D(input_dim)
mlp = SuperMLPv2(
input_dim,
d_model,
hidden_multiplier=mlp_hidden_multiplier,
out_features=output_dim,
out_features=d_model,
act_layer=act_layer,
drop=drop,
)
drop2 = nn.Dropout(drop or 0.0)
norm2 = SuperLayerNorm1D(output_dim)
if order is LayerOrder.PreNorm:
self.norm1 = norm1
self.norm1 = SuperLayerNorm1D(d_model)
self.mha = mha
self.drop1 = drop1
self.norm2 = norm2
self.drop1 = nn.Dropout(drop or 0.0)
self.norm2 = SuperLayerNorm1D(d_model)
self.mlp = mlp
self.drop2 = drop2
elif order is LayerOrder.PostNoem:
self.drop2 = nn.Dropout(drop or 0.0)
elif order is LayerOrder.PostNorm:
self.mha = mha
self.drop1 = drop1
self.norm1 = norm1
self.drop1 = nn.Dropout(drop or 0.0)
self.norm1 = SuperLayerNorm1D(d_model)
self.mlp = mlp
self.drop2 = drop2
self.norm2 = norm2
self.drop2 = nn.Dropout(drop or 0.0)
self.norm2 = SuperLayerNorm1D(d_model)
else:
raise ValueError("Unknown order: {:}".format(order))
self._order = order
@property
def abstract_search_space(self):
@ -108,18 +104,19 @@ class SuperTransformerEncoderLayer(SuperModule):
return self.forward_raw(input)
def forward_raw(self, input: torch.Tensor) -> torch.Tensor:
if order is LayerOrder.PreNorm:
if self._order is LayerOrder.PreNorm:
x = self.norm1(input)
x = x + self.drop1(self.mha(x))
x = self.norm2(x)
x = x + self.drop2(self.mlp(x))
elif order is LayerOrder.PostNoem:
elif self._order is LayerOrder.PostNorm:
# multi-head attention
x = x + self.drop1(self.mha(input))
x = self.mha(input)
x = x + self.drop1(x)
x = self.norm1(x)
# feed-forward layer
x = x + self.drop2(self.mlp(x))
x = self.norm2(x)
else:
raise ValueError("Unknown order: {:}".format(order))
raise ValueError("Unknown order: {:}".format(self._order))
return x

View File

@ -53,11 +53,13 @@ class TestSuperAttention(unittest.TestCase):
@parameterized.expand([[6], [12], [24], [48]])
def test_transformer_encoder(self, input_dim):
output_dim = spaces.Categorical(12, 24, 36)
model = super_core.SuperTransformerEncoderLayer(
input_dim,
output_dim=output_dim,
num_heads=spaces.Categorical(2, 4, 6),
mlp_hidden_multiplier=spaces.Categorical(1, 2, 4),
model = super_core.SuperSequential(
super_core.SuperLinear(input_dim, output_dim),
super_core.SuperTransformerEncoderLayer(
output_dim,
num_heads=spaces.Categorical(2, 4, 6),
mlp_hidden_multiplier=spaces.Categorical(1, 2, 4),
),
)
print(model)
model.apply_verbose(True)

View File

@ -36,25 +36,31 @@ def _internal_func(inputs, model):
return abstract_child, outputs
def _create_stel(input_dim, output_dim):
return super_core.SuperTransformerEncoderLayer(
input_dim,
output_dim,
num_heads=spaces.Categorical(2, 4, 6),
mlp_hidden_multiplier=spaces.Categorical(1, 2, 4),
def _create_stel(input_dim, output_dim, order):
return super_core.SuperSequential(
super_core.SuperLinear(input_dim, output_dim),
super_core.SuperTransformerEncoderLayer(
output_dim,
num_heads=spaces.Categorical(2, 4, 6),
mlp_hidden_multiplier=spaces.Categorical(1, 2, 4),
order=order,
),
)
@pytest.mark.parametrize("batch", (1, 2, 4))
@pytest.mark.parametrize("seq_dim", (1, 10, 30))
@pytest.mark.parametrize("input_dim", (6, 12, 24, 27))
def test_super_sequential(batch, seq_dim, input_dim):
@pytest.mark.parametrize(
"order", (super_core.LayerOrder.PreNorm, super_core.LayerOrder.PostNorm)
)
def test_super_sequential(batch, seq_dim, input_dim, order):
out1_dim = spaces.Categorical(12, 24, 36)
out2_dim = spaces.Categorical(24, 36, 48)
out3_dim = spaces.Categorical(36, 72, 100)
layer1 = _create_stel(input_dim, out1_dim)
layer2 = _create_stel(out1_dim, out2_dim)
layer3 = _create_stel(out2_dim, out3_dim)
layer1 = _create_stel(input_dim, out1_dim, order)
layer2 = _create_stel(out1_dim, out2_dim, order)
layer3 = _create_stel(out2_dim, out3_dim, order)
model = super_core.SuperSequential(layer1, layer2, layer3)
print(model)
model.apply_verbose(True)